{
 "cells": [
  {
   "cell_type": "code",
   "id": "ac52e124-4a85-49ad-a7a8-d538951d70d0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:27:28.595648Z",
     "start_time": "2025-04-28T08:27:28.537294Z"
    }
   },
   "source": [
    "import numpy as np     #只需要下载numpy库即可\n",
    "import random\n",
    "import GridWorld_v2\n",
    "import time\n",
    "from IPython.display import clear_output"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "id": "02325b8c-eb8e-470f-bf36-9d7aa5d70452",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:27:28.598993Z",
     "start_time": "2025-04-28T08:27:28.595648Z"
    }
   },
   "source": [
    "rows = 5      #记得行数和列数这里要同步改\n",
    "columns = 5\n",
    "\n",
    "# gridworld = GridWorld_v2.GridWorld_v2(rows=rows, columns=columns, forbiddenAreaNums=8, targetNums=2, seed = 52,forbiddenAreaScore=-10)\n",
    "# gridworld = GridWorld_v2.GridWorld_v2(desc = [\".#\",\".T\"])             #赵老师4-1的例子\n",
    "# gridworld = GridWorld_v2.GridWorld_v2(desc = [\"##.T\",\"...#\",\"....\"])  #随便弄的例子\n",
    "gridworld = GridWorld_v2.GridWorld_v2(forbiddenAreaScore=-10, score=1,desc = [\".....\",\".##..\",\"..#..\",\".#T#.\",\".#...\"]) \n",
    "#gridworld = GridWorld_v2(forbiddenAreaScore=-10, score=1,desc = [\"T.\"]) \n",
    "gridworld.show()\n",
    "\n",
    "\n",
    "value = np.zeros(rows*columns)       #初始化可以任意，也可以全0\n",
    "qtable = np.zeros((rows*columns,5))  #初始化，这里主要是初始化维数，里面的内容会被覆盖所以无所谓\n"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "⬜️⬜️⬜️⬜️⬜️\n",
      "⬜️🚫🚫⬜️⬜️\n",
      "⬜️⬜️🚫⬜️⬜️\n",
      "⬜️🚫✅🚫⬜️\n",
      "⬜️🚫⬜️⬜️⬜️\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "id": "cb5c9ed9-e3a8-42e1-9fa3-60815143ae25",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:27:28.605010Z",
     "start_time": "2025-04-28T08:27:28.598993Z"
    }
   },
   "source": [
    "def QLearning_On_Policy(gridworld:GridWorld_v2.GridWorld_v2,gamma = 0.99,trajectorySteps=-1, learning_rate=0.001, final_epsilon=0.01, num_episodes=600)->GridWorld_v2.GridWorld_v2:\n",
    "    \"\"\"\n",
    "    Parameters:\n",
    "    gamma (float): 近视远视程度\n",
    "    trajectorySteps (int): 寻路的轨迹长度，如果是-1，则为寻到目的则停止，否则参数即为trajectory长度\n",
    "    learning_rate (float): 学习率，用于调节TD-target\n",
    "    epsilon (float): epsilon-greedy的核心参数，0~1的浮点数，其中1则表示当前state所有决策概率一样，0则表示决策没有任何的随机性\n",
    "    num_episodes (int): 表示模型迭代次数\n",
    "\n",
    "    Returns:\n",
    "    GridWorld_v2.GridWorld_v2: 把模型返回回去\n",
    "    \"\"\"\n",
    "    \n",
    "    state_value = np.zeros((rows * columns))\n",
    "    action_value = np.zeros((rows * columns, 5))\n",
    "    policy = np.eye(5)[np.random.randint(0,5,size=(rows*columns))] \n",
    "    epsilon = 0.5\n",
    "    for episode in range(num_episodes):\n",
    "        #清除输出，可以更好的展示策略\n",
    "        # time.sleep(0.2)\n",
    "        # clear_output(wait=True)\n",
    "        \n",
    "        print(\"episode\",f\"{episode}/{num_episodes}\")\n",
    "        if(epsilon > final_epsilon) :\n",
    "            epsilon -= 0.001\n",
    "        else:\n",
    "            epsilon = final_epsilon\n",
    "\n",
    "        # p1是目标方向的概率，p0是另外四个方向的概率\n",
    "        p1 = 1-epsilon * (4/5)\n",
    "        p0 = epsilon/5\n",
    "        d = {1:p1, 0:p0}\n",
    "        # policy_epsilon是policy取epsilon-greedy的概率决策\n",
    "        print(\"p1\",p1,\"p0\",p0)\n",
    "        policy_epsilon = np.vectorize(d.get)(policy)\n",
    "\n",
    "        #cnt数组用来检查每个state有多少次访问\n",
    "        cnt = [0 for i in range(25)]\n",
    "        \n",
    "        initState=10\n",
    "        initAction=random.randint(0,4)  \n",
    "\n",
    "        if trajectorySteps==-1:\n",
    "            stop_when_reach_target = True\n",
    "        Trajectory = gridworld.getTrajectoryScore(nowState=initState, \n",
    "                                                  action=initAction, \n",
    "                                                  policy=policy_epsilon, \n",
    "                                                  steps=trajectorySteps, \n",
    "                                                  stop_when_reach_target=True)\n",
    "        print(\"trajectorySteps\",len(Trajectory))\n",
    "        \n",
    "\n",
    "        \n",
    "        # 注意这里的返回值是大小为(trajectorySteps+1)的元组列表，因为把第一个动作也加入进去了\n",
    "        steps = len(Trajectory) - 1\n",
    "        for k in range(steps,-1,-1):\n",
    "            #State，Action，Reward，NextState，NextAction\n",
    "            tmpstate, tmpaction, tmpscore, nextState, nextAction  = Trajectory[k]\n",
    "            cnt[tmpstate] += 1\n",
    "            #Q.learning,和Sarsa不同的是，Qlearning是直接选择最优的action，所以有个max\n",
    "            TD_error = action_value[tmpstate][tmpaction] - (tmpscore + gamma * action_value[nextState].max())\n",
    "            action_value[tmpstate][tmpaction] -= learning_rate * TD_error\n",
    "\n",
    "        # policy improvement\n",
    "        policy = np.eye(5)[np.argmax(action_value,axis=1)]  #qtable的最优值作为更新策略，并用独热码来表示\n",
    "        policy_epsilon = np.vectorize(d.get)(policy)\n",
    "    \n",
    "        #输出每个state的访问次数\n",
    "        print(np.array(cnt).reshape(5,5)) \n",
    "\n",
    "        state_value = np.sum(policy_epsilon * action_value,axis=1)\n",
    "        mean_state_value = np.sum(policy_epsilon * action_value,axis=1).mean()\n",
    "        \n",
    "        gridworld.showPolicy(policy)\n",
    "        print(np.round(state_value,decimals=4).reshape(5,5))\n",
    "        print(\"mean_state_value\", mean_state_value)\n",
    "\n",
    "    return gridworld\n",
    "    "
   ],
   "outputs": [],
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "id": "5defee9b-cf45-4d92-ba2b-a540cdcf4ab9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:27:29.300741Z",
     "start_time": "2025-04-28T08:27:28.605010Z"
    }
   },
   "source": "QLearning_On_Policy(gridworld) #直接拿到全局最优",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 0/600\n",
      "p1 0.6008 p0 0.0998\n",
      "trajectorySteps 20\n",
      "[[2 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [6 1 1 0 0]\n",
      " [1 1 3 0 0]]\n",
      "⬆️⬆️⬆️⬆️⬆️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬆️⏫️⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️⬆️⬆️\n",
      "[[-0.0001  0.      0.      0.      0.    ]\n",
      " [ 0.      0.      0.      0.      0.    ]\n",
      " [ 0.      0.      0.      0.      0.    ]\n",
      " [-0.0013 -0.001  -0.001   0.      0.    ]\n",
      " [ 0.      0.      0.0004  0.      0.    ]]\n",
      "mean_state_value -0.00011963234019152081\n",
      "episode 1/600\n",
      "p1 0.6015999999999999 p0 0.0996\n",
      "trajectorySteps 5795\n",
      "[[ 927  985 1048  988  826]\n",
      " [ 160  182  198  156  149]\n",
      " [  27   27   35   26   35]\n",
      " [   5    4    1    2   10]\n",
      " [   0    0    0    1    3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-0.0522 -0.1525 -0.1502 -0.0451 -0.0456]\n",
      " [-0.0199 -0.0433 -0.0562 -0.0217 -0.0013]\n",
      " [-0.0002 -0.0228 -0.0261 -0.003  -0.0006]\n",
      " [-0.0013 -0.001  -0.002   0.     -0.0002]\n",
      " [ 0.      0.      0.0004 -0.001   0.    ]]\n",
      "mean_state_value -0.025828056997863617\n",
      "episode 2/600\n",
      "p1 0.6024 p0 0.0994\n",
      "trajectorySteps 712\n",
      "[[  4  10  17  67 277]\n",
      " [  2   5   6  45 232]\n",
      " [  2   0   1   5  29]\n",
      " [  1   0   1   1   6]\n",
      " [  0   0   0   0   1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-0.0522 -0.154  -0.1508 -0.0453 -0.0494]\n",
      " [-0.0199 -0.0442 -0.0591 -0.0255 -0.0031]\n",
      " [-0.0002 -0.0227 -0.0254 -0.005  -0.0008]\n",
      " [-0.0013 -0.001  -0.003   0.     -0.0004]\n",
      " [ 0.      0.      0.0004 -0.001   0.    ]]\n",
      "mean_state_value -0.02655670035296613\n",
      "episode 3/600\n",
      "p1 0.6032 p0 0.0992\n",
      "trajectorySteps 723\n",
      "[[  2  11  21  80 258]\n",
      " [  3   1   6  50 227]\n",
      " [  6   2   2  11  31]\n",
      " [  1   0   1   0   8]\n",
      " [  0   0   0   0   2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.210e-02 -1.538e-01 -1.514e-01 -4.560e-02 -5.290e-02]\n",
      " [-2.000e-02 -4.410e-02 -5.990e-02 -3.030e-02 -5.300e-03]\n",
      " [-3.000e-04 -2.270e-02 -2.480e-02 -6.900e-03 -1.200e-03]\n",
      " [-1.300e-03 -1.000e-03 -4.000e-03  0.000e+00 -6.000e-04]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.02715226350505197\n",
      "episode 4/600\n",
      "p1 0.604 p0 0.099\n",
      "trajectorySteps 109\n",
      "[[ 1  5  5 16 29]\n",
      " [ 0  6  1  4 24]\n",
      " [ 1  2  2  4  5]\n",
      " [ 0  0  1  1  2]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.200e-02 -1.553e-01 -1.520e-01 -4.570e-02 -5.330e-02]\n",
      " [-1.990e-02 -4.600e-02 -5.980e-02 -3.020e-02 -5.700e-03]\n",
      " [-3.000e-04 -2.460e-02 -2.410e-02 -9.900e-03 -1.200e-03]\n",
      " [-1.300e-03 -1.000e-03 -4.000e-03  0.000e+00 -6.000e-04]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.027495073187471245\n",
      "episode 5/600\n",
      "p1 0.6048 p0 0.0988\n",
      "trajectorySteps 217\n",
      "[[ 1  1  2 17 82]\n",
      " [ 1  0  1  9 62]\n",
      " [ 3  0  1 10 21]\n",
      " [ 0  0  1  1  4]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.190e-02 -1.549e-01 -1.517e-01 -4.560e-02 -5.490e-02]\n",
      " [-1.990e-02 -4.590e-02 -5.970e-02 -3.110e-02 -6.000e-03]\n",
      " [-3.000e-04 -2.450e-02 -2.350e-02 -1.180e-02 -1.600e-03]\n",
      " [-1.300e-03 -1.000e-03 -3.900e-03  0.000e+00 -6.000e-04]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.02763020286873118\n",
      "episode 6/600\n",
      "p1 0.6055999999999999 p0 0.0986\n",
      "trajectorySteps 603\n",
      "[[  9  10  31  80 225]\n",
      " [  5   2  11  35 164]\n",
      " [  5   2   1   6  15]\n",
      " [  1   0   1   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.210e-02 -1.556e-01 -1.568e-01 -4.580e-02 -5.780e-02]\n",
      " [-2.080e-02 -4.580e-02 -6.150e-02 -3.490e-02 -7.300e-03]\n",
      " [-3.000e-04 -2.450e-02 -2.280e-02 -1.180e-02 -1.700e-03]\n",
      " [-1.300e-03 -1.000e-03 -3.900e-03  0.000e+00 -6.000e-04]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.02827580429806577\n",
      "episode 7/600\n",
      "p1 0.6064 p0 0.0984\n",
      "trajectorySteps 59\n",
      "[[ 1  7  2  4 15]\n",
      " [ 1  4  0  0 11]\n",
      " [ 3  0  2  2  3]\n",
      " [ 3  0  1  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.200e-02 -1.587e-01 -1.565e-01 -4.570e-02 -5.790e-02]\n",
      " [-2.080e-02 -4.570e-02 -6.140e-02 -3.480e-02 -7.400e-03]\n",
      " [-4.000e-04 -2.440e-02 -2.310e-02 -1.270e-02 -1.800e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -6.000e-04]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.028435533900502158\n",
      "episode 8/600\n",
      "p1 0.6072 p0 0.0982\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 1 1 0 0]\n",
      " [0 0 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.190e-02 -1.584e-01 -1.561e-01 -4.570e-02 -5.780e-02]\n",
      " [-2.070e-02 -4.560e-02 -6.120e-02 -3.470e-02 -7.400e-03]\n",
      " [-5.000e-04 -2.540e-02 -2.240e-02 -1.270e-02 -1.800e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -6.000e-04]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.028396029231735107\n",
      "episode 9/600\n",
      "p1 0.608 p0 0.098\n",
      "trajectorySteps 770\n",
      "[[  9  15  34  96 263]\n",
      " [  3   2   9  47 240]\n",
      " [  3   0   1   6  38]\n",
      " [  1   0   1   1   1]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.190e-02 -1.599e-01 -1.585e-01 -4.630e-02 -6.120e-02]\n",
      " [-2.070e-02 -4.550e-02 -6.400e-02 -3.840e-02 -9.600e-03]\n",
      " [-5.000e-04 -2.530e-02 -2.180e-02 -1.270e-02 -2.100e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -1.600e-03]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.029084577497819394\n",
      "episode 10/600\n",
      "p1 0.6088 p0 0.0978\n",
      "trajectorySteps 1072\n",
      "[[  1   4  35 118 398]\n",
      " [  1   1   8  64 369]\n",
      " [  2   0   1  11  53]\n",
      " [  1   0   1   0   4]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.180e-02 -1.605e-01 -1.592e-01 -4.680e-02 -6.600e-02]\n",
      " [-2.060e-02 -4.540e-02 -6.480e-02 -4.400e-02 -1.270e-02]\n",
      " [-5.000e-04 -2.530e-02 -2.110e-02 -1.360e-02 -2.800e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -1.600e-03]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.029748492344955895\n",
      "episode 11/600\n",
      "p1 0.6095999999999999 p0 0.09759999999999999\n",
      "trajectorySteps 197\n",
      "[[ 2  5 13 26 64]\n",
      " [ 2  2  4 12 52]\n",
      " [ 3  2  2  1  6]\n",
      " [ 0  0  1  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.170e-02 -1.602e-01 -1.606e-01 -4.670e-02 -6.670e-02]\n",
      " [-2.060e-02 -4.530e-02 -6.750e-02 -4.580e-02 -1.310e-02]\n",
      " [-5.000e-04 -2.620e-02 -2.040e-02 -1.360e-02 -2.900e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -1.600e-03]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.030014554321580574\n",
      "episode 12/600\n",
      "p1 0.6104 p0 0.0974\n",
      "trajectorySteps 1257\n",
      "[[  3   8  28 130 461]\n",
      " [  3   1  13  94 432]\n",
      " [  1   0   3  19  55]\n",
      " [  0   0   1   2   3]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.160e-02 -1.599e-01 -1.621e-01 -4.700e-02 -7.210e-02]\n",
      " [-2.070e-02 -4.520e-02 -6.930e-02 -5.490e-02 -1.670e-02]\n",
      " [-5.000e-04 -2.610e-02 -2.070e-02 -1.650e-02 -3.400e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -2.500e-03]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.031050286918136\n",
      "episode 13/600\n",
      "p1 0.6112 p0 0.0972\n",
      "trajectorySteps 645\n",
      "[[  2   4  17  59 265]\n",
      " [  2   0   7  19 222]\n",
      " [  2   1   2   4  35]\n",
      " [  0   0   1   0   3]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.160e-02 -1.596e-01 -1.636e-01 -4.720e-02 -7.570e-02]\n",
      " [-2.070e-02 -4.520e-02 -7.010e-02 -5.850e-02 -1.900e-02]\n",
      " [-5.000e-04 -2.610e-02 -2.100e-02 -1.740e-02 -3.700e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -2.500e-03]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.031565754504882144\n",
      "episode 14/600\n",
      "p1 0.612 p0 0.097\n",
      "trajectorySteps 478\n",
      "[[  4   2  15  61 172]\n",
      " [  3   0   3  25 144]\n",
      " [  4   1   2   6  27]\n",
      " [  2   0   1   0   3]\n",
      " [  2   0   0   0   1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.170e-02 -1.593e-01 -1.641e-01 -4.720e-02 -7.740e-02]\n",
      " [-2.080e-02 -4.510e-02 -7.190e-02 -6.020e-02 -1.990e-02]\n",
      " [-6.000e-04 -2.600e-02 -2.030e-02 -1.740e-02 -3.900e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -2.500e-03]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.03180183755976101\n",
      "episode 15/600\n",
      "p1 0.6128 p0 0.0968\n",
      "trajectorySteps 199\n",
      "[[ 2  3  6 20 60]\n",
      " [ 4  4  0 16 56]\n",
      " [ 5  2  1  3 11]\n",
      " [ 2  1  1  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️⬆️⬆️\n",
      "⬆️⬅️⏬⬆️⬆️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏩️⬆️➡️⬆️\n",
      "[[-5.160e-02 -1.607e-01 -1.638e-01 -4.710e-02 -7.790e-02]\n",
      " [-2.270e-02 -4.500e-02 -7.170e-02 -6.000e-02 -2.040e-02]\n",
      " [-7.000e-04 -2.690e-02 -1.970e-02 -1.830e-02 -4.200e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -3.500e-03]\n",
      " [ 0.000e+00  0.000e+00  4.000e-04 -1.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.03204047793564363\n",
      "episode 16/600\n",
      "p1 0.6135999999999999 p0 0.09659999999999999\n",
      "trajectorySteps 1575\n",
      "[[ 10  19  37 147 603]\n",
      " [  5   4  10  65 541]\n",
      " [  3   3   3  24  77]\n",
      " [  0   0   1   4   7]\n",
      " [  0   2   2   3   5]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.170e-02 -1.605e-01 -1.670e-01 -4.790e-02 -8.460e-02]\n",
      " [-2.270e-02 -4.580e-02 -7.530e-02 -6.350e-02 -2.450e-02]\n",
      " [-8.000e-04 -2.690e-02 -2.050e-02 -2.400e-02 -4.800e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.900e-03  0.000e+00 -4.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  1.000e-04 -1.100e-03 -2.000e-04]]\n",
      "mean_state_value -0.03329220588222468\n",
      "episode 17/600\n",
      "p1 0.6144000000000001 p0 0.0964\n",
      "trajectorySteps 18\n",
      "[[1 2 1 1 1]\n",
      " [2 1 0 0 1]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 1 1]\n",
      " [0 0 1 1 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.150e-02 -1.610e-01 -1.666e-01 -4.780e-02 -8.440e-02]\n",
      " [-2.260e-02 -4.570e-02 -7.520e-02 -6.340e-02 -2.440e-02]\n",
      " [-8.000e-04 -2.680e-02 -2.050e-02 -2.390e-02 -4.900e-03]\n",
      " [-1.300e-03 -1.000e-03 -3.800e-03  0.000e+00 -5.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  7.000e-04 -1.100e-03 -2.000e-04]]\n",
      "mean_state_value -0.03327318999372293\n",
      "episode 18/600\n",
      "p1 0.6152 p0 0.0962\n",
      "trajectorySteps 29\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [4 1 0 0 2]\n",
      " [4 0 1 0 1]\n",
      " [1 1 3 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.140e-02 -1.606e-01 -1.663e-01 -4.770e-02 -8.420e-02]\n",
      " [-2.260e-02 -4.560e-02 -7.500e-02 -6.330e-02 -2.440e-02]\n",
      " [-8.000e-04 -2.680e-02 -2.040e-02 -2.390e-02 -4.900e-03]\n",
      " [-1.400e-03 -1.000e-03 -4.800e-03  0.000e+00 -5.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  3.000e-04 -1.100e-03 -2.000e-04]]\n",
      "mean_state_value -0.03325852249063657\n",
      "episode 19/600\n",
      "p1 0.616 p0 0.096\n",
      "trajectorySteps 29\n",
      "[[2 3 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 2]\n",
      " [0 0 1 0 5]\n",
      " [0 0 1 3 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.130e-02 -1.604e-01 -1.659e-01 -4.760e-02 -8.400e-02]\n",
      " [-2.250e-02 -4.550e-02 -7.490e-02 -6.310e-02 -2.430e-02]\n",
      " [-1.000e-03 -2.670e-02 -2.030e-02 -2.380e-02 -4.900e-03]\n",
      " [-1.400e-03 -1.000e-03 -4.200e-03  0.000e+00 -5.600e-03]\n",
      " [ 0.000e+00 -1.000e-04  1.000e-03 -1.000e-03 -2.000e-04]]\n",
      "mean_state_value -0.03315425867642725\n",
      "episode 20/600\n",
      "p1 0.6168 p0 0.0958\n",
      "trajectorySteps 18\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.120e-02 -1.600e-01 -1.656e-01 -4.750e-02 -8.390e-02]\n",
      " [-2.250e-02 -4.540e-02 -7.470e-02 -6.300e-02 -2.430e-02]\n",
      " [-1.000e-03 -2.660e-02 -2.030e-02 -2.380e-02 -4.900e-03]\n",
      " [-1.400e-03 -1.000e-03 -3.600e-03  0.000e+00 -6.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  1.600e-03 -1.000e-03 -2.000e-04]]\n",
      "mean_state_value -0.03307215761142106\n",
      "episode 21/600\n",
      "p1 0.6175999999999999 p0 0.09559999999999999\n",
      "trajectorySteps 23\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 1 2]\n",
      " [0 2 4 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.110e-02 -1.597e-01 -1.653e-01 -4.740e-02 -8.370e-02]\n",
      " [-2.240e-02 -4.530e-02 -7.460e-02 -6.290e-02 -2.420e-02]\n",
      " [-1.000e-03 -2.660e-02 -2.020e-02 -2.370e-02 -4.800e-03]\n",
      " [-1.400e-03 -1.000e-03 -2.900e-03  0.000e+00 -7.400e-03]\n",
      " [ 0.000e+00 -1.000e-04  2.000e-04 -1.000e-03 -2.000e-04]]\n",
      "mean_state_value -0.033069444713759825\n",
      "episode 22/600\n",
      "p1 0.6184000000000001 p0 0.0954\n",
      "trajectorySteps 26\n",
      "[[2 1 1 1 3]\n",
      " [1 0 0 1 4]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 1 1]\n",
      " [0 0 1 3 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.110e-02 -1.594e-01 -1.649e-01 -4.730e-02 -8.350e-02]\n",
      " [-2.240e-02 -4.530e-02 -7.440e-02 -6.270e-02 -2.420e-02]\n",
      " [-9.000e-04 -2.650e-02 -2.010e-02 -2.370e-02 -4.800e-03]\n",
      " [-1.400e-03 -1.000e-03 -2.300e-03  0.000e+00 -7.400e-03]\n",
      " [ 0.000e+00 -1.000e-04  8.000e-04 -2.000e-03 -2.000e-04]]\n",
      "mean_state_value -0.032991915766708034\n",
      "episode 23/600\n",
      "p1 0.6192 p0 0.09519999999999999\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 2]\n",
      " [2 0 0 0 1]\n",
      " [2 1 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.100e-02 -1.590e-01 -1.646e-01 -4.720e-02 -8.340e-02]\n",
      " [-2.240e-02 -4.520e-02 -7.420e-02 -6.260e-02 -2.410e-02]\n",
      " [-9.000e-04 -2.650e-02 -2.000e-02 -2.360e-02 -4.800e-03]\n",
      " [-1.400e-03 -1.000e-03 -1.700e-03  0.000e+00 -7.400e-03]\n",
      " [ 0.000e+00 -1.000e-04  1.500e-03 -2.000e-03 -2.000e-04]]\n",
      "mean_state_value -0.032874554964280475\n",
      "episode 24/600\n",
      "p1 0.62 p0 0.095\n",
      "trajectorySteps 29\n",
      "[[2 4 1 2 3]\n",
      " [2 2 0 1 3]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.090e-02 -1.604e-01 -1.642e-01 -4.710e-02 -8.330e-02]\n",
      " [-2.250e-02 -4.510e-02 -7.410e-02 -6.250e-02 -2.410e-02]\n",
      " [-1.000e-03 -2.640e-02 -2.000e-02 -2.360e-02 -4.900e-03]\n",
      " [-1.400e-03 -9.000e-04 -1.000e-03  0.000e+00 -7.400e-03]\n",
      " [ 0.000e+00 -1.000e-04  2.100e-03 -2.000e-03 -2.000e-04]]\n",
      "mean_state_value -0.03283857885723428\n",
      "episode 25/600\n",
      "p1 0.6208 p0 0.0948\n",
      "trajectorySteps 33\n",
      "[[1 1 1 2 3]\n",
      " [1 0 0 0 2]\n",
      " [3 0 0 1 3]\n",
      " [1 0 1 2 4]\n",
      " [0 0 1 3 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.080e-02 -1.600e-01 -1.639e-01 -4.700e-02 -8.310e-02]\n",
      " [-2.240e-02 -4.500e-02 -7.390e-02 -6.240e-02 -2.410e-02]\n",
      " [-1.000e-03 -2.640e-02 -1.990e-02 -2.350e-02 -4.900e-03]\n",
      " [-1.400e-03 -9.000e-04 -1.000e-03  0.000e+00 -8.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  2.700e-03 -2.900e-03 -2.000e-04]]\n",
      "mean_state_value -0.032826527295953506\n",
      "episode 26/600\n",
      "p1 0.6215999999999999 p0 0.09459999999999999\n",
      "trajectorySteps 33\n",
      "[[4 3 3 6 2]\n",
      " [3 1 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.070e-02 -1.605e-01 -1.635e-01 -4.700e-02 -8.290e-02]\n",
      " [-2.240e-02 -4.490e-02 -7.380e-02 -6.220e-02 -2.400e-02]\n",
      " [-1.100e-03 -2.630e-02 -1.980e-02 -2.350e-02 -5.000e-03]\n",
      " [-1.400e-03 -9.000e-04 -1.900e-03  0.000e+00 -8.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  3.400e-03 -3.000e-03 -2.000e-04]]\n",
      "mean_state_value -0.03281542607681288\n",
      "episode 27/600\n",
      "p1 0.6224000000000001 p0 0.0944\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.060e-02 -1.602e-01 -1.632e-01 -4.690e-02 -8.280e-02]\n",
      " [-2.230e-02 -4.480e-02 -7.360e-02 -6.210e-02 -2.400e-02]\n",
      " [-1.100e-03 -2.630e-02 -1.970e-02 -2.340e-02 -5.000e-03]\n",
      " [-1.400e-03 -9.000e-04 -1.300e-03  0.000e+00 -8.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  4.000e-03 -3.000e-03 -2.000e-04]]\n",
      "mean_state_value -0.03269327639417408\n",
      "episode 28/600\n",
      "p1 0.6232 p0 0.09419999999999999\n",
      "trajectorySteps 31\n",
      "[[4 5 2 2 1]\n",
      " [3 1 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 3]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.060e-02 -1.607e-01 -1.628e-01 -4.680e-02 -8.260e-02]\n",
      " [-2.240e-02 -4.470e-02 -7.350e-02 -6.200e-02 -2.390e-02]\n",
      " [-1.200e-03 -2.620e-02 -1.970e-02 -2.340e-02 -5.000e-03]\n",
      " [-1.400e-03 -9.000e-04 -7.000e-04  0.000e+00 -8.400e-03]\n",
      " [ 0.000e+00 -1.000e-04  4.700e-03 -2.900e-03 -2.000e-04]]\n",
      "mean_state_value -0.032615524371436805\n",
      "episode 29/600\n",
      "p1 0.624 p0 0.094\n",
      "trajectorySteps 29\n",
      "[[1 2 3 3 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 2 2]\n",
      " [0 1 2 3 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏬⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.050e-02 -1.604e-01 -1.625e-01 -4.670e-02 -8.240e-02]\n",
      " [-2.230e-02 -4.460e-02 -7.330e-02 -6.180e-02 -2.390e-02]\n",
      " [-1.200e-03 -2.610e-02 -1.960e-02 -2.330e-02 -4.900e-03]\n",
      " [-1.400e-03 -9.000e-04 -1.600e-03 -9.000e-04 -8.400e-03]\n",
      " [ 0.000e+00 -1.000e-04  4.400e-03 -3.900e-03 -2.000e-04]]\n",
      "mean_state_value -0.03266762895016175\n",
      "episode 30/600\n",
      "p1 0.6248 p0 0.0938\n",
      "trajectorySteps 18\n",
      "[[0 1 1 1 0]\n",
      " [1 1 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 1 3]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.030e-02 -1.600e-01 -1.621e-01 -4.660e-02 -8.220e-02]\n",
      " [-2.320e-02 -4.450e-02 -7.320e-02 -6.170e-02 -2.380e-02]\n",
      " [-1.200e-03 -2.610e-02 -1.950e-02 -2.330e-02 -4.900e-03]\n",
      " [-1.400e-03 -9.000e-04 -9.000e-04 -3.000e-04 -8.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  4.400e-03 -4.800e-03 -2.000e-04]]\n",
      "mean_state_value -0.03262246795765391\n",
      "episode 31/600\n",
      "p1 0.6255999999999999 p0 0.09359999999999999\n",
      "trajectorySteps 17\n",
      "[[3 4 2 0 0]\n",
      " [1 0 2 0 0]\n",
      " [3 0 1 0 0]\n",
      " [0 0 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.020e-02 -1.597e-01 -1.634e-01 -4.650e-02 -8.210e-02]\n",
      " [-2.310e-02 -4.440e-02 -7.390e-02 -6.160e-02 -2.380e-02]\n",
      " [-1.300e-03 -2.600e-02 -1.880e-02 -2.320e-02 -4.900e-03]\n",
      " [-1.400e-03 -9.000e-04 -3.000e-04 -3.000e-04 -8.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  4.400e-03 -4.800e-03 -2.000e-04]]\n",
      "mean_state_value -0.03260705346143211\n",
      "episode 32/600\n",
      "p1 0.6264000000000001 p0 0.0934\n",
      "trajectorySteps 34\n",
      "[[1 1 2 1 0]\n",
      " [1 0 1 1 2]\n",
      " [5 0 0 0 1]\n",
      " [6 0 1 0 1]\n",
      " [1 0 2 4 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.010e-02 -1.594e-01 -1.639e-01 -4.640e-02 -8.190e-02]\n",
      " [-2.310e-02 -4.430e-02 -7.370e-02 -6.140e-02 -2.380e-02]\n",
      " [-1.400e-03 -2.600e-02 -1.870e-02 -2.320e-02 -4.900e-03]\n",
      " [-1.500e-03 -9.000e-04  4.000e-04 -3.000e-04 -8.500e-03]\n",
      " [ 0.000e+00 -1.000e-04  5.000e-03 -4.800e-03 -3.000e-04]]\n",
      "mean_state_value -0.032529520201912715\n",
      "episode 33/600\n",
      "p1 0.6272 p0 0.09319999999999999\n",
      "trajectorySteps 29\n",
      "[[3 2 1 1 0]\n",
      " [3 0 0 1 0]\n",
      " [2 1 1 1 4]\n",
      " [0 1 1 0 2]\n",
      " [0 1 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0501 -0.1591 -0.1635 -0.0463 -0.0817]\n",
      " [-0.023  -0.0442 -0.0736 -0.0613 -0.0238]\n",
      " [-0.0015 -0.0268 -0.0181 -0.0231 -0.005 ]\n",
      " [-0.0015 -0.0009  0.001  -0.0003 -0.0085]\n",
      " [ 0.     -0.001   0.0041 -0.0047 -0.0003]]\n",
      "mean_state_value -0.03252971647737061\n",
      "episode 34/600\n",
      "p1 0.628 p0 0.093\n",
      "trajectorySteps 21\n",
      "[[1 1 2 4 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.000e-02 -1.588e-01 -1.632e-01 -4.630e-02 -8.150e-02]\n",
      " [-2.300e-02 -4.410e-02 -7.340e-02 -6.120e-02 -2.380e-02]\n",
      " [-1.500e-03 -2.680e-02 -1.800e-02 -2.310e-02 -5.000e-03]\n",
      " [-1.500e-03 -9.000e-04  1.000e-04 -3.000e-04 -8.400e-03]\n",
      " [ 0.000e+00 -1.000e-03  4.700e-03 -4.700e-03 -3.000e-04]]\n",
      "mean_state_value -0.03247776500342112\n",
      "episode 35/600\n",
      "p1 0.6288 p0 0.0928\n",
      "trajectorySteps 25\n",
      "[[2 4 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 5 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0499 -0.1585 -0.1628 -0.0462 -0.0814]\n",
      " [-0.0229 -0.044  -0.0733 -0.061  -0.0237]\n",
      " [-0.0015 -0.0267 -0.0179 -0.023  -0.005 ]\n",
      " [-0.0015 -0.0009 -0.0008 -0.0003 -0.0084]\n",
      " [ 0.     -0.001   0.0054 -0.0047 -0.0003]]\n",
      "mean_state_value -0.032419897062495664\n",
      "episode 36/600\n",
      "p1 0.6295999999999999 p0 0.09259999999999999\n",
      "trajectorySteps 22\n",
      "[[1 2 1 1 1]\n",
      " [1 0 1 1 1]\n",
      " [2 1 0 0 3]\n",
      " [0 0 1 2 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0498 -0.1582 -0.1633 -0.0461 -0.0813]\n",
      " [-0.0229 -0.0439 -0.0731 -0.0609 -0.0237]\n",
      " [-0.0015 -0.0267 -0.0178 -0.023  -0.0051]\n",
      " [-0.0015 -0.0009 -0.0017 -0.0006 -0.0084]\n",
      " [ 0.     -0.001   0.0054 -0.0056 -0.0003]]\n",
      "mean_state_value -0.03247249661408788\n",
      "episode 37/600\n",
      "p1 0.6304000000000001 p0 0.0924\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 1 0 0]\n",
      " [0 1 1 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0497 -0.1578 -0.163  -0.046  -0.0811]\n",
      " [-0.0229 -0.0438 -0.073  -0.0608 -0.0236]\n",
      " [-0.0015 -0.0266 -0.0178 -0.0229 -0.0051]\n",
      " [-0.0024 -0.0018 -0.001  -0.0006 -0.0084]\n",
      " [ 0.     -0.001   0.006  -0.0056 -0.0003]]\n",
      "mean_state_value -0.032421194197067425\n",
      "episode 38/600\n",
      "p1 0.6312 p0 0.09219999999999999\n",
      "trajectorySteps 24\n",
      "[[2 1 2 2 1]\n",
      " [1 0 1 0 1]\n",
      " [3 1 0 0 2]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0496 -0.1575 -0.1634 -0.046  -0.0809]\n",
      " [-0.0228 -0.0437 -0.0728 -0.0606 -0.0236]\n",
      " [-0.0015 -0.0266 -0.0177 -0.0229 -0.0051]\n",
      " [-0.0024 -0.0018 -0.0004 -0.0006 -0.0084]\n",
      " [ 0.     -0.001   0.0067 -0.0056 -0.0003]]\n",
      "mean_state_value -0.03233173498136066\n",
      "episode 39/600\n",
      "p1 0.632 p0 0.092\n",
      "trajectorySteps 17\n",
      "[[0 1 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 3]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0495 -0.1572 -0.163  -0.0459 -0.0807]\n",
      " [-0.0237 -0.0436 -0.0726 -0.0605 -0.0235]\n",
      " [-0.0015 -0.0265 -0.0176 -0.0228 -0.0051]\n",
      " [-0.0024 -0.0018  0.0003 -0.0006 -0.0084]\n",
      " [ 0.     -0.001   0.0073 -0.0055 -0.0003]]\n",
      "mean_state_value -0.03224923232172349\n",
      "episode 40/600\n",
      "p1 0.6328 p0 0.09179999999999999\n",
      "trajectorySteps 30\n",
      "[[2 1 0 0 0]\n",
      " [2 1 1 2 1]\n",
      " [2 0 0 2 6]\n",
      " [0 0 1 0 4]\n",
      " [0 0 1 1 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0495 -0.1576 -0.1627 -0.0458 -0.0806]\n",
      " [-0.0236 -0.0444 -0.0725 -0.0604 -0.0235]\n",
      " [-0.0015 -0.0264 -0.0175 -0.0228 -0.0053]\n",
      " [-0.0024 -0.0018 -0.0006 -0.0006 -0.0084]\n",
      " [ 0.     -0.001   0.0079 -0.0055 -0.0004]]\n",
      "mean_state_value -0.03226920927616855\n",
      "episode 41/600\n",
      "p1 0.6335999999999999 p0 0.09159999999999999\n",
      "trajectorySteps 23\n",
      "[[1 2 4 2 1]\n",
      " [1 0 0 0 3]\n",
      " [1 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.930e-02 -1.573e-01 -1.624e-01 -4.570e-02 -8.040e-02]\n",
      " [-2.350e-02 -4.430e-02 -7.230e-02 -6.020e-02 -2.350e-02]\n",
      " [-1.500e-03 -2.640e-02 -1.750e-02 -2.270e-02 -5.300e-03]\n",
      " [-2.400e-03 -1.800e-03  1.000e-04 -6.000e-04 -8.400e-03]\n",
      " [ 0.000e+00 -1.000e-03  8.600e-03 -5.500e-03 -4.000e-04]]\n",
      "mean_state_value -0.03214756301657896\n",
      "episode 42/600\n",
      "p1 0.6344000000000001 p0 0.0914\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0493 -0.1569 -0.162  -0.0456 -0.0802]\n",
      " [-0.0235 -0.0442 -0.0722 -0.0601 -0.0234]\n",
      " [-0.0015 -0.0263 -0.0174 -0.0227 -0.0052]\n",
      " [-0.0024 -0.0018  0.0008 -0.0005 -0.0084]\n",
      " [ 0.     -0.001   0.0092 -0.0055 -0.0004]]\n",
      "mean_state_value -0.03202807990189859\n",
      "episode 43/600\n",
      "p1 0.6352 p0 0.09119999999999999\n",
      "trajectorySteps 34\n",
      "[[2 5 3 1 2]\n",
      " [1 2 2 1 5]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.920e-02 -1.582e-01 -1.617e-01 -4.550e-02 -8.010e-02]\n",
      " [-2.340e-02 -4.500e-02 -7.290e-02 -6.000e-02 -2.340e-02]\n",
      " [-1.500e-03 -2.630e-02 -1.730e-02 -2.260e-02 -5.200e-03]\n",
      " [-2.400e-03 -1.800e-03 -1.000e-04 -5.000e-04 -8.400e-03]\n",
      " [ 0.000e+00 -1.000e-03  9.900e-03 -5.500e-03 -4.000e-04]]\n",
      "mean_state_value -0.03210604771751295\n",
      "episode 44/600\n",
      "p1 0.636 p0 0.091\n",
      "trajectorySteps 26\n",
      "[[3 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [1 0 1 0 1]\n",
      " [0 2 3 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0492 -0.1579 -0.1613 -0.0454 -0.0799]\n",
      " [-0.0234 -0.0449 -0.0727 -0.0599 -0.0233]\n",
      " [-0.0015 -0.0262 -0.0172 -0.0226 -0.0052]\n",
      " [-0.0024 -0.0018  0.0006 -0.0005 -0.0084]\n",
      " [ 0.     -0.0009  0.0088 -0.0056 -0.0005]]\n",
      "mean_state_value -0.03206253020710655\n",
      "episode 45/600\n",
      "p1 0.6368 p0 0.09079999999999999\n",
      "trajectorySteps 11\n",
      "[[1 3 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [2 1 1 0 0]\n",
      " [0 0 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0491 -0.1584 -0.161  -0.0453 -0.0797]\n",
      " [-0.0233 -0.0448 -0.0726 -0.0597 -0.0233]\n",
      " [-0.0015 -0.0271 -0.0165 -0.0225 -0.0052]\n",
      " [-0.0024 -0.0018 -0.0003 -0.0005 -0.0084]\n",
      " [ 0.     -0.0009  0.0088 -0.0056 -0.0005]]\n",
      "mean_state_value -0.032068862197060254\n",
      "episode 46/600\n",
      "p1 0.6376 p0 0.09059999999999999\n",
      "trajectorySteps 28\n",
      "[[1 2 4 1 0]\n",
      " [2 1 0 1 2]\n",
      " [5 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 1 3 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.049  -0.1589 -0.1607 -0.0452 -0.0796]\n",
      " [-0.0233 -0.0447 -0.0724 -0.0596 -0.0233]\n",
      " [-0.0017 -0.027  -0.0164 -0.0225 -0.0052]\n",
      " [-0.0024 -0.0018  0.0004 -0.0005 -0.0084]\n",
      " [ 0.     -0.0009  0.0085 -0.0055 -0.0004]]\n",
      "mean_state_value -0.032025036908827076\n",
      "episode 47/600\n",
      "p1 0.6384000000000001 p0 0.0904\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 2]\n",
      " [2 1 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0489 -0.1585 -0.1604 -0.0451 -0.0794]\n",
      " [-0.0232 -0.0446 -0.0723 -0.0595 -0.0232]\n",
      " [-0.0017 -0.0269 -0.0164 -0.0224 -0.0053]\n",
      " [-0.0023 -0.0018  0.001  -0.0005 -0.0084]\n",
      " [ 0.     -0.0009  0.0092 -0.0055 -0.0004]]\n",
      "mean_state_value -0.03190030346569454\n",
      "episode 48/600\n",
      "p1 0.6392 p0 0.09019999999999999\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [2 0 1 0 0]\n",
      " [1 1 1 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0488 -0.1582 -0.16   -0.045  -0.0792]\n",
      " [-0.0232 -0.0445 -0.0721 -0.0593 -0.0232]\n",
      " [-0.0017 -0.0269 -0.0163 -0.0224 -0.0053]\n",
      " [-0.0023 -0.0018  0.0017 -0.0005 -0.0084]\n",
      " [-0.0009 -0.0009  0.0098 -0.0055 -0.0004]]\n",
      "mean_state_value -0.03180808305244272\n",
      "episode 49/600\n",
      "p1 0.64 p0 0.09\n",
      "trajectorySteps 33\n",
      "[[2 2 1 4 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 1 4]\n",
      " [4 1 1 0 2]\n",
      " [1 1 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0488 -0.1579 -0.1596 -0.045  -0.079 ]\n",
      " [-0.0231 -0.0444 -0.0719 -0.0592 -0.0231]\n",
      " [-0.0017 -0.0268 -0.0162 -0.0223 -0.0053]\n",
      " [-0.0032 -0.0027  0.0018 -0.0005 -0.0083]\n",
      " [-0.0009 -0.0009  0.0105 -0.0055 -0.0004]]\n",
      "mean_state_value -0.031790955897472474\n",
      "episode 50/600\n",
      "p1 0.6408 p0 0.08979999999999999\n",
      "trajectorySteps 18\n",
      "[[2 1 1 1 2]\n",
      " [2 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 2]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0487 -0.1575 -0.1593 -0.0449 -0.0789]\n",
      " [-0.0231 -0.0443 -0.0718 -0.0591 -0.0232]\n",
      " [-0.0018 -0.0268 -0.0161 -0.0223 -0.0053]\n",
      " [-0.0032 -0.0027  0.0025  0.0002 -0.0092]\n",
      " [-0.0009 -0.0009  0.0106 -0.0054 -0.0004]]\n",
      "mean_state_value -0.0317040158336584\n",
      "episode 51/600\n",
      "p1 0.6416 p0 0.08959999999999999\n",
      "trajectorySteps 51\n",
      "[[7 5 5 3 2]\n",
      " [6 1 0 0 3]\n",
      " [8 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 4 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0487 -0.1579 -0.159  -0.0449 -0.0787]\n",
      " [-0.023  -0.0442 -0.0716 -0.0589 -0.0231]\n",
      " [-0.0019 -0.0267 -0.0161 -0.0222 -0.0053]\n",
      " [-0.0032 -0.0027  0.0031  0.0002 -0.0092]\n",
      " [-0.0009 -0.0009  0.0111 -0.0054 -0.0004]]\n",
      "mean_state_value -0.03162662213945714\n",
      "episode 52/600\n",
      "p1 0.6424000000000001 p0 0.0894\n",
      "trajectorySteps 21\n",
      "[[2 1 5 3 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 0 1 1]\n",
      " [0 0 1 2 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.860e-02 -1.576e-01 -1.587e-01 -4.480e-02 -7.850e-02]\n",
      " [-2.300e-02 -4.410e-02 -7.150e-02 -5.880e-02 -2.300e-02]\n",
      " [-1.900e-03 -2.660e-02 -1.600e-02 -2.310e-02 -5.300e-03]\n",
      " [-3.200e-03 -2.700e-03  3.800e-03 -1.000e-04 -9.200e-03]\n",
      " [-9.000e-04 -9.000e-04  1.110e-02 -5.400e-03 -4.000e-04]]\n",
      "mean_state_value -0.03157225524884549\n",
      "episode 53/600\n",
      "p1 0.6432 p0 0.08919999999999999\n",
      "trajectorySteps 28\n",
      "[[1 1 2 3 2]\n",
      " [2 0 0 1 3]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 2 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.850e-02 -1.572e-01 -1.584e-01 -4.470e-02 -7.830e-02]\n",
      " [-2.290e-02 -4.400e-02 -7.130e-02 -5.870e-02 -2.300e-02]\n",
      " [-1.900e-03 -2.660e-02 -1.590e-02 -2.300e-02 -5.300e-03]\n",
      " [-3.200e-03 -2.700e-03  4.500e-03 -1.000e-04 -9.200e-03]\n",
      " [-9.000e-04 -9.000e-04  1.180e-02 -5.400e-03 -5.000e-04]]\n",
      "mean_state_value -0.031454931994354604\n",
      "episode 54/600\n",
      "p1 0.644 p0 0.089\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.840e-02 -1.569e-01 -1.580e-01 -4.460e-02 -7.820e-02]\n",
      " [-2.290e-02 -4.390e-02 -7.110e-02 -5.850e-02 -2.290e-02]\n",
      " [-2.000e-03 -2.650e-02 -1.580e-02 -2.300e-02 -5.300e-03]\n",
      " [-3.200e-03 -2.700e-03  5.200e-03 -1.000e-04 -9.200e-03]\n",
      " [-9.000e-04 -9.000e-04  1.250e-02 -5.300e-03 -5.000e-04]]\n",
      "mean_state_value -0.03132856673115667\n",
      "episode 55/600\n",
      "p1 0.6448 p0 0.08879999999999999\n",
      "trajectorySteps 22\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 3]\n",
      " [3 1 0 0 4]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0483 -0.1565 -0.1577 -0.0445 -0.078 ]\n",
      " [-0.0228 -0.0438 -0.071  -0.0584 -0.023 ]\n",
      " [-0.002  -0.0265 -0.0158 -0.0229 -0.0054]\n",
      " [-0.0032 -0.0027  0.0059 -0.     -0.0092]\n",
      " [-0.0009 -0.0009  0.0132 -0.0053 -0.0005]]\n",
      "mean_state_value -0.031207872763700323\n",
      "episode 56/600\n",
      "p1 0.6456 p0 0.08859999999999998\n",
      "trajectorySteps 36\n",
      "[[1 2 2 0 0]\n",
      " [7 1 2 1 1]\n",
      " [8 2 0 0 1]\n",
      " [2 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0482 -0.1562 -0.1581 -0.0444 -0.0778]\n",
      " [-0.0237 -0.0438 -0.0717 -0.0583 -0.0229]\n",
      " [-0.0022 -0.0264 -0.0157 -0.0229 -0.0054]\n",
      " [-0.0032 -0.0027  0.0051 -0.     -0.0092]\n",
      " [-0.0009 -0.0009  0.0139 -0.0053 -0.0005]]\n",
      "mean_state_value -0.031249120045569484\n",
      "episode 57/600\n",
      "p1 0.6464000000000001 p0 0.08839999999999999\n",
      "trajectorySteps 32\n",
      "[[2 5 3 3 0]\n",
      " [4 2 1 1 1]\n",
      " [4 1 0 0 2]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0481 -0.1567 -0.1585 -0.0443 -0.0776]\n",
      " [-0.0245 -0.0437 -0.0715 -0.0581 -0.0229]\n",
      " [-0.0023 -0.0263 -0.0156 -0.0228 -0.0055]\n",
      " [-0.0032 -0.0026  0.0042  0.0006 -0.01  ]\n",
      " [-0.0009 -0.0009  0.0139 -0.0053 -0.0005]]\n",
      "mean_state_value -0.031324878363897246\n",
      "episode 58/600\n",
      "p1 0.6472 p0 0.08819999999999999\n",
      "trajectorySteps 22\n",
      "[[0 1 1 2 1]\n",
      " [1 1 0 0 3]\n",
      " [2 1 0 0 3]\n",
      " [0 0 1 2 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.048  -0.1563 -0.1582 -0.0442 -0.0775]\n",
      " [-0.0253 -0.0436 -0.0714 -0.058  -0.0228]\n",
      " [-0.0023 -0.0263 -0.0155 -0.0228 -0.0056]\n",
      " [-0.0032 -0.0026  0.0049  0.0004 -0.01  ]\n",
      " [-0.0009 -0.0009  0.0139 -0.0061 -0.0005]]\n",
      "mean_state_value -0.031301777528074115\n",
      "episode 59/600\n",
      "p1 0.648 p0 0.088\n",
      "trajectorySteps 23\n",
      "[[1 3 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 4 3 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0479 -0.156  -0.1579 -0.0441 -0.0773]\n",
      " [-0.0253 -0.0435 -0.0712 -0.0579 -0.0228]\n",
      " [-0.0023 -0.0262 -0.0155 -0.0227 -0.0055]\n",
      " [-0.0032 -0.0026  0.0041  0.0004 -0.01  ]\n",
      " [-0.0009 -0.0018  0.0129 -0.0061 -0.0005]]\n",
      "mean_state_value -0.03134152202823941\n",
      "episode 60/600\n",
      "p1 0.6488 p0 0.08779999999999999\n",
      "trajectorySteps 22\n",
      "[[1 2 2 1 0]\n",
      " [2 0 0 1 1]\n",
      " [4 0 0 0 1]\n",
      " [1 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0477 -0.1557 -0.1575 -0.044  -0.0771]\n",
      " [-0.0252 -0.0434 -0.071  -0.0577 -0.0227]\n",
      " [-0.0023 -0.0262 -0.0154 -0.0227 -0.0055]\n",
      " [-0.0032 -0.0026  0.0033  0.0005 -0.01  ]\n",
      " [-0.0009 -0.0018  0.0136 -0.006  -0.0005]]\n",
      "mean_state_value -0.03127773583700473\n",
      "episode 61/600\n",
      "p1 0.6496 p0 0.08759999999999998\n",
      "trajectorySteps 25\n",
      "[[1 1 1 2 0]\n",
      " [1 0 1 2 2]\n",
      " [4 2 0 0 2]\n",
      " [0 0 1 1 3]\n",
      " [0 0 0 0 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0476 -0.1554 -0.1579 -0.044  -0.0769]\n",
      " [-0.0252 -0.0433 -0.0709 -0.0576 -0.0226]\n",
      " [-0.0022 -0.0261 -0.0153 -0.0226 -0.0055]\n",
      " [-0.0031 -0.0026  0.004   0.0011 -0.011 ]\n",
      " [-0.0009 -0.0018  0.0136 -0.006  -0.0005]]\n",
      "mean_state_value -0.031216059770542134\n",
      "episode 62/600\n",
      "p1 0.6504000000000001 p0 0.08739999999999999\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 2]\n",
      " [1 0 1 1 2]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0475 -0.155  -0.1576 -0.0439 -0.0768]\n",
      " [-0.0251 -0.0432 -0.0707 -0.0575 -0.0226]\n",
      " [-0.0022 -0.026  -0.0152 -0.0226 -0.0055]\n",
      " [-0.0031 -0.0026  0.0032  0.0018 -0.0118]\n",
      " [-0.0009 -0.0018  0.0137 -0.006  -0.0005]]\n",
      "mean_state_value -0.031181410779813654\n",
      "episode 63/600\n",
      "p1 0.6512 p0 0.08719999999999999\n",
      "trajectorySteps 24\n",
      "[[0 1 1 2 4]\n",
      " [1 1 0 0 2]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0474 -0.1547 -0.1572 -0.0438 -0.0766]\n",
      " [-0.0259 -0.0431 -0.0706 -0.0574 -0.0225]\n",
      " [-0.0022 -0.026  -0.0151 -0.0225 -0.0055]\n",
      " [-0.0031 -0.0026  0.0024  0.0018 -0.0118]\n",
      " [-0.0009 -0.0018  0.0144 -0.006  -0.0006]]\n",
      "mean_state_value -0.03114675302426301\n",
      "episode 64/600\n",
      "p1 0.652 p0 0.087\n",
      "trajectorySteps 24\n",
      "[[1 1 2 1 2]\n",
      " [2 0 0 0 2]\n",
      " [4 1 0 0 1]\n",
      " [2 2 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0473 -0.1543 -0.1569 -0.0437 -0.0765]\n",
      " [-0.0258 -0.043  -0.0704 -0.0572 -0.0225]\n",
      " [-0.0022 -0.0259 -0.0151 -0.0225 -0.0055]\n",
      " [-0.004  -0.0035  0.0031  0.0025 -0.0126]\n",
      " [-0.0009 -0.0018  0.0144 -0.006  -0.0006]]\n",
      "mean_state_value -0.031119659869548396\n",
      "episode 65/600\n",
      "p1 0.6528 p0 0.08679999999999999\n",
      "trajectorySteps 26\n",
      "[[0 1 3 3 3]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 1 4]\n",
      " [1 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0472 -0.154  -0.1566 -0.0436 -0.0763]\n",
      " [-0.0266 -0.0429 -0.0702 -0.0571 -0.0224]\n",
      " [-0.0022 -0.0259 -0.015  -0.0224 -0.0056]\n",
      " [-0.004  -0.0035  0.0038  0.0032 -0.0126]\n",
      " [-0.0009 -0.0018  0.0145 -0.0068 -0.0006]]\n",
      "mean_state_value -0.03106302941127125\n",
      "episode 66/600\n",
      "p1 0.6536 p0 0.08659999999999998\n",
      "trajectorySteps 24\n",
      "[[1 1 1 3 5]\n",
      " [1 0 0 0 3]\n",
      " [2 1 0 0 2]\n",
      " [0 0 1 1 2]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0471 -0.1536 -0.1562 -0.0435 -0.0761]\n",
      " [-0.0266 -0.0428 -0.0701 -0.057  -0.0224]\n",
      " [-0.0022 -0.0258 -0.0149 -0.0224 -0.0056]\n",
      " [-0.004  -0.0035  0.003   0.0039 -0.0134]\n",
      " [-0.0009 -0.0018  0.0146 -0.0068 -0.0006]]\n",
      "mean_state_value -0.031027495086231725\n",
      "episode 67/600\n",
      "p1 0.6544000000000001 p0 0.08639999999999999\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.047  -0.1532 -0.1559 -0.0434 -0.076 ]\n",
      " [-0.0265 -0.0427 -0.0699 -0.0568 -0.0223]\n",
      " [-0.0023 -0.0257 -0.0148 -0.0223 -0.0056]\n",
      " [-0.004  -0.0035  0.0031  0.0039 -0.0134]\n",
      " [-0.0009 -0.0017  0.0153 -0.0067 -0.0006]]\n",
      "mean_state_value -0.030922948418855185\n",
      "episode 68/600\n",
      "p1 0.6552 p0 0.08619999999999998\n",
      "trajectorySteps 18\n",
      "[[1 2 2 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 1 2]\n",
      " [0 0 0 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0469 -0.1529 -0.1556 -0.0433 -0.0758]\n",
      " [-0.0264 -0.0426 -0.0697 -0.0567 -0.0223]\n",
      " [-0.0023 -0.0257 -0.0148 -0.0223 -0.0056]\n",
      " [-0.004  -0.0034  0.0038  0.0046 -0.0134]\n",
      " [-0.0009 -0.0017  0.0153 -0.0076 -0.0006]]\n",
      "mean_state_value -0.030826979572125112\n",
      "episode 69/600\n",
      "p1 0.656 p0 0.086\n",
      "trajectorySteps 31\n",
      "[[3 4 2 0 0]\n",
      " [4 1 1 1 1]\n",
      " [3 1 0 0 2]\n",
      " [1 1 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0468 -0.1526 -0.1559 -0.0432 -0.0756]\n",
      " [-0.0265 -0.0425 -0.0696 -0.0566 -0.0222]\n",
      " [-0.0024 -0.0265 -0.0147 -0.0222 -0.0056]\n",
      " [-0.0048 -0.0034  0.003   0.0046 -0.0133]\n",
      " [-0.0009 -0.0017  0.016  -0.0075 -0.0006]]\n",
      "mean_state_value -0.030859516465253135\n",
      "episode 70/600\n",
      "p1 0.6568 p0 0.08579999999999999\n",
      "trajectorySteps 20\n",
      "[[2 1 2 2 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0467 -0.1523 -0.1556 -0.0431 -0.0754]\n",
      " [-0.0264 -0.0424 -0.0694 -0.0564 -0.0222]\n",
      " [-0.0025 -0.0264 -0.0146 -0.0221 -0.0056]\n",
      " [-0.0048 -0.0034  0.0037  0.0046 -0.0133]\n",
      " [-0.0009 -0.0017  0.0167 -0.0075 -0.0006]]\n",
      "mean_state_value -0.03072988537928451\n",
      "episode 71/600\n",
      "p1 0.6576 p0 0.08559999999999998\n",
      "trajectorySteps 22\n",
      "[[0 1 1 2 3]\n",
      " [0 1 0 0 3]\n",
      " [1 1 0 1 4]\n",
      " [0 0 1 1 2]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0466 -0.1519 -0.1553 -0.043  -0.0753]\n",
      " [-0.0263 -0.0423 -0.0693 -0.0563 -0.0221]\n",
      " [-0.0024 -0.0272 -0.0145 -0.0221 -0.0055]\n",
      " [-0.0048 -0.0034  0.0045  0.0053 -0.0141]\n",
      " [-0.0009 -0.0017  0.0168 -0.0075 -0.0006]]\n",
      "mean_state_value -0.030664126889653285\n",
      "episode 72/600\n",
      "p1 0.6584000000000001 p0 0.08539999999999999\n",
      "trajectorySteps 31\n",
      "[[0 3 1 1 1]\n",
      " [0 3 1 0 1]\n",
      " [1 1 0 0 1]\n",
      " [0 0 1 0 3]\n",
      " [0 1 4 3 5]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0464 -0.1517 -0.1549 -0.0429 -0.0751]\n",
      " [-0.0263 -0.0438 -0.0699 -0.0562 -0.0221]\n",
      " [-0.0024 -0.028  -0.0145 -0.022  -0.0055]\n",
      " [-0.0048 -0.0034  0.0037  0.0053 -0.0141]\n",
      " [-0.0009 -0.0017  0.0166 -0.0074 -0.0008]]\n",
      "mean_state_value -0.03076801481850582\n",
      "episode 73/600\n",
      "p1 0.6592 p0 0.08519999999999998\n",
      "trajectorySteps 27\n",
      "[[1 2 1 1 2]\n",
      " [2 0 0 1 4]\n",
      " [3 1 0 1 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0463 -0.1513 -0.1545 -0.0428 -0.075 ]\n",
      " [-0.0262 -0.0437 -0.0698 -0.056  -0.022 ]\n",
      " [-0.0025 -0.0279 -0.0144 -0.022  -0.0055]\n",
      " [-0.0048 -0.0034  0.0044  0.0053 -0.014 ]\n",
      " [-0.0009 -0.0017  0.0173 -0.0074 -0.0008]]\n",
      "mean_state_value -0.030637325717157812\n",
      "episode 74/600\n",
      "p1 0.66 p0 0.08499999999999999\n",
      "trajectorySteps 25\n",
      "[[1 2 3 5 4]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0462 -0.151  -0.1542 -0.0427 -0.0748]\n",
      " [-0.0262 -0.0436 -0.0696 -0.0559 -0.022 ]\n",
      " [-0.0025 -0.0278 -0.0143 -0.0219 -0.0055]\n",
      " [-0.0048 -0.0034  0.0036  0.006  -0.0148]\n",
      " [-0.0008 -0.0017  0.0173 -0.0073 -0.0008]]\n",
      "mean_state_value -0.03059711661184253\n",
      "episode 75/600\n",
      "p1 0.6608 p0 0.08479999999999999\n",
      "trajectorySteps 27\n",
      "[[3 1 1 1 1]\n",
      " [5 1 0 0 2]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0461 -0.1507 -0.1538 -0.0426 -0.0746]\n",
      " [-0.027  -0.0435 -0.0694 -0.0558 -0.022 ]\n",
      " [-0.0026 -0.0278 -0.0142 -0.0219 -0.0055]\n",
      " [-0.0047 -0.0034  0.0044  0.006  -0.0148]\n",
      " [-0.0008 -0.0017  0.018  -0.0073 -0.0007]]\n",
      "mean_state_value -0.030506138700946473\n",
      "episode 76/600\n",
      "p1 0.6616 p0 0.08459999999999998\n",
      "trajectorySteps 23\n",
      "[[1 1 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 3]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.046  -0.1503 -0.1535 -0.0425 -0.0744]\n",
      " [-0.0269 -0.0434 -0.0693 -0.0556 -0.0219]\n",
      " [-0.0026 -0.0277 -0.0142 -0.0218 -0.0056]\n",
      " [-0.0047 -0.0034  0.0044  0.0061 -0.0148]\n",
      " [-0.0008 -0.0017  0.0187 -0.0072 -0.0008]]\n",
      "mean_state_value -0.030404230763088534\n",
      "episode 77/600\n",
      "p1 0.6624000000000001 p0 0.08439999999999999\n",
      "trajectorySteps 28\n",
      "[[2 2 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 1 2 3]\n",
      " [0 0 1 5 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0459 -0.15   -0.1531 -0.0424 -0.0743]\n",
      " [-0.0269 -0.0433 -0.0691 -0.0555 -0.0219]\n",
      " [-0.0027 -0.0276 -0.0141 -0.0218 -0.0056]\n",
      " [-0.0047 -0.0034  0.0037  0.0068 -0.0148]\n",
      " [-0.0008 -0.0017  0.0188 -0.0089 -0.0008]]\n",
      "mean_state_value -0.030402037152524864\n",
      "episode 78/600\n",
      "p1 0.6632 p0 0.08419999999999998\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0458 -0.1497 -0.1528 -0.0423 -0.0741]\n",
      " [-0.0268 -0.0432 -0.0689 -0.0554 -0.0218]\n",
      " [-0.0027 -0.0276 -0.014  -0.0217 -0.0055]\n",
      " [-0.0047 -0.0034  0.0044  0.0068 -0.0148]\n",
      " [-0.0008 -0.0017  0.0195 -0.0088 -0.0008]]\n",
      "mean_state_value -0.030269809332316978\n",
      "episode 79/600\n",
      "p1 0.664 p0 0.08399999999999999\n",
      "trajectorySteps 18\n",
      "[[1 2 1 1 1]\n",
      " [1 1 0 1 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0457 -0.1493 -0.1524 -0.0422 -0.0739]\n",
      " [-0.0276 -0.0431 -0.0688 -0.0552 -0.0218]\n",
      " [-0.0027 -0.0275 -0.0139 -0.0217 -0.0055]\n",
      " [-0.0047 -0.0034  0.0037  0.0068 -0.0148]\n",
      " [-0.0008 -0.0017  0.0202 -0.0087 -0.0008]]\n",
      "mean_state_value -0.03022505215530681\n",
      "episode 80/600\n",
      "p1 0.6648000000000001 p0 0.08379999999999999\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 3]\n",
      " [2 0 0 0 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0456 -0.1489 -0.152  -0.0421 -0.0738]\n",
      " [-0.0276 -0.043  -0.0686 -0.0551 -0.0217]\n",
      " [-0.0027 -0.0274 -0.0139 -0.0216 -0.0055]\n",
      " [-0.0047 -0.0033  0.0044  0.0068 -0.0147]\n",
      " [-0.0008 -0.0017  0.0209 -0.0087 -0.0009]]\n",
      "mean_state_value -0.03009423916757594\n",
      "episode 81/600\n",
      "p1 0.6656 p0 0.08359999999999998\n",
      "trajectorySteps 21\n",
      "[[1 1 2 3 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0455 -0.1486 -0.1517 -0.042  -0.0736]\n",
      " [-0.0275 -0.0429 -0.0685 -0.055  -0.0217]\n",
      " [-0.0027 -0.0274 -0.0138 -0.0216 -0.0055]\n",
      " [-0.0047 -0.0033  0.0052  0.0068 -0.0148]\n",
      " [-0.0008 -0.0017  0.0216 -0.0087 -0.0009]]\n",
      "mean_state_value -0.029964988655193862\n",
      "episode 82/600\n",
      "p1 0.6664000000000001 p0 0.08339999999999999\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 2]\n",
      " [2 0 0 0 2]\n",
      " [1 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0454 -0.1482 -0.1514 -0.0419 -0.0734]\n",
      " [-0.0274 -0.0428 -0.0683 -0.0549 -0.0216]\n",
      " [-0.0027 -0.0273 -0.0137 -0.0215 -0.0056]\n",
      " [-0.0047 -0.0033  0.0059  0.0069 -0.0147]\n",
      " [-0.0008 -0.0017  0.0224 -0.0088 -0.0009]]\n",
      "mean_state_value -0.029835144528833055\n",
      "episode 83/600\n",
      "p1 0.6672 p0 0.08319999999999998\n",
      "trajectorySteps 25\n",
      "[[6 2 2 1 2]\n",
      " [4 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0453 -0.1479 -0.151  -0.0418 -0.0733]\n",
      " [-0.0274 -0.0427 -0.0681 -0.0547 -0.0216]\n",
      " [-0.0027 -0.0272 -0.0136 -0.0215 -0.0055]\n",
      " [-0.0046 -0.0033  0.0052  0.0069 -0.0147]\n",
      " [-0.0008 -0.0016  0.0231 -0.0087 -0.0009]]\n",
      "mean_state_value -0.029760942634042706\n",
      "episode 84/600\n",
      "p1 0.668 p0 0.08299999999999999\n",
      "trajectorySteps 24\n",
      "[[2 1 1 1 0]\n",
      " [2 0 0 1 1]\n",
      " [2 0 0 1 2]\n",
      " [0 0 1 1 2]\n",
      " [0 0 1 2 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0452 -0.1475 -0.1506 -0.0417 -0.0732]\n",
      " [-0.0273 -0.0426 -0.068  -0.0546 -0.0215]\n",
      " [-0.0028 -0.0272 -0.0136 -0.0214 -0.0055]\n",
      " [-0.0046 -0.0033  0.0059  0.0069 -0.0147]\n",
      " [-0.0008 -0.0016  0.0238 -0.0095 -0.0009]]\n",
      "mean_state_value -0.02965876561641475\n",
      "episode 85/600\n",
      "p1 0.6688000000000001 p0 0.08279999999999998\n",
      "trajectorySteps 23\n",
      "[[2 1 1 1 3]\n",
      " [2 0 0 0 3]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0452 -0.1472 -0.1503 -0.0416 -0.073 ]\n",
      " [-0.0273 -0.0425 -0.0678 -0.0545 -0.0215]\n",
      " [-0.0028 -0.0271 -0.0135 -0.0214 -0.0055]\n",
      " [-0.0046 -0.0033  0.0052  0.0069 -0.0146]\n",
      " [-0.0008 -0.0016  0.0245 -0.0094 -0.0009]]\n",
      "mean_state_value -0.02959095101544443\n",
      "episode 86/600\n",
      "p1 0.6696 p0 0.08259999999999998\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [1 0 0 1 2]\n",
      " [0 0 1 2 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.045  -0.1468 -0.1499 -0.0415 -0.0728]\n",
      " [-0.0273 -0.0424 -0.0676 -0.0543 -0.0214]\n",
      " [-0.0028 -0.027  -0.0134 -0.0213 -0.0055]\n",
      " [-0.0046 -0.0033  0.0059  0.0068 -0.0154]\n",
      " [-0.0008 -0.0016  0.0246 -0.0094 -0.0009]]\n",
      "mean_state_value -0.029518104743458937\n",
      "episode 87/600\n",
      "p1 0.6704000000000001 p0 0.08239999999999999\n",
      "trajectorySteps 18\n",
      "[[3 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.045  -0.1465 -0.1495 -0.0414 -0.0727]\n",
      " [-0.0272 -0.0423 -0.0675 -0.0542 -0.0214]\n",
      " [-0.0028 -0.027  -0.0133 -0.0213 -0.0055]\n",
      " [-0.0046 -0.0033  0.0067  0.0068 -0.0154]\n",
      " [-0.0008 -0.0016  0.0253 -0.0093 -0.0009]]\n",
      "mean_state_value -0.02938219236022908\n",
      "episode 88/600\n",
      "p1 0.6712 p0 0.08219999999999998\n",
      "trajectorySteps 49\n",
      "[[4 4 2 3 3]\n",
      " [4 1 0 1 1]\n",
      " [5 1 0 1 2]\n",
      " [3 0 1 0 2]\n",
      " [1 1 3 2 4]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.045  -0.1468 -0.1492 -0.0413 -0.0725]\n",
      " [-0.0272 -0.0422 -0.0673 -0.0541 -0.0213]\n",
      " [-0.0028 -0.0269 -0.0133 -0.0212 -0.0055]\n",
      " [-0.0046 -0.0033  0.006   0.0069 -0.0153]\n",
      " [-0.0008 -0.0016  0.0252 -0.0092 -0.001 ]]\n",
      "mean_state_value -0.029376147231840472\n",
      "episode 89/600\n",
      "p1 0.672 p0 0.08199999999999999\n",
      "trajectorySteps 27\n",
      "[[1 4 3 1 1]\n",
      " [2 1 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 3 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0449 -0.1472 -0.1488 -0.0412 -0.0723]\n",
      " [-0.0271 -0.0421 -0.0671 -0.0539 -0.0213]\n",
      " [-0.0028 -0.0268 -0.0132 -0.0212 -0.0055]\n",
      " [-0.0046 -0.0033  0.0061  0.0069 -0.0153]\n",
      " [-0.0008 -0.0016  0.0259 -0.0091 -0.001 ]]\n",
      "mean_state_value -0.029290271212252996\n",
      "episode 90/600\n",
      "p1 0.6728000000000001 p0 0.08179999999999998\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0448 -0.1468 -0.1485 -0.0411 -0.0722]\n",
      " [-0.0271 -0.042  -0.067  -0.0538 -0.0212]\n",
      " [-0.0029 -0.0268 -0.0131 -0.0211 -0.0055]\n",
      " [-0.0046 -0.0033  0.0068  0.0069 -0.0152]\n",
      " [-0.0008 -0.0016  0.0267 -0.0091 -0.001 ]]\n",
      "mean_state_value -0.0291538295656405\n",
      "episode 91/600\n",
      "p1 0.6736 p0 0.08159999999999998\n",
      "trajectorySteps 32\n",
      "[[3 3 3 3 3]\n",
      " [1 1 2 1 3]\n",
      " [3 1 0 0 1]\n",
      " [1 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0447 -0.1465 -0.1481 -0.041  -0.072 ]\n",
      " [-0.027  -0.0419 -0.0684 -0.0544 -0.0212]\n",
      " [-0.0029 -0.0267 -0.013  -0.0211 -0.0054]\n",
      " [-0.0046 -0.0033  0.0076  0.0076 -0.016 ]\n",
      " [-0.0008 -0.0016  0.0267 -0.009  -0.001 ]]\n",
      "mean_state_value -0.029145297078969973\n",
      "episode 92/600\n",
      "p1 0.6744000000000001 p0 0.08139999999999999\n",
      "trajectorySteps 32\n",
      "[[2 2 3 3 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 1 3 4 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0446 -0.1461 -0.1478 -0.0409 -0.0718]\n",
      " [-0.0269 -0.0418 -0.0682 -0.0543 -0.0211]\n",
      " [-0.003  -0.0267 -0.013  -0.021  -0.0056]\n",
      " [-0.0045 -0.0033  0.0077  0.0076 -0.016 ]\n",
      " [-0.0008 -0.0016  0.0267 -0.009  -0.001 ]]\n",
      "mean_state_value -0.029076367498356813\n",
      "episode 93/600\n",
      "p1 0.6752 p0 0.08119999999999998\n",
      "trajectorySteps 30\n",
      "[[3 2 1 1 1]\n",
      " [2 0 0 3 4]\n",
      " [2 0 0 0 1]\n",
      " [3 0 1 0 2]\n",
      " [1 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0446 -0.1457 -0.1474 -0.0408 -0.0716]\n",
      " [-0.027  -0.0417 -0.0681 -0.0542 -0.0211]\n",
      " [-0.003  -0.0266 -0.0129 -0.021  -0.0056]\n",
      " [-0.0045 -0.0032  0.0085  0.0077 -0.0159]\n",
      " [-0.0008 -0.0016  0.0274 -0.0089 -0.001 ]]\n",
      "mean_state_value -0.028942017413498028\n",
      "episode 94/600\n",
      "p1 0.676 p0 0.08099999999999999\n",
      "trajectorySteps 15\n",
      "[[0 1 1 1 1]\n",
      " [0 1 0 0 1]\n",
      " [1 1 0 0 2]\n",
      " [0 0 1 1 3]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0445 -0.1454 -0.1471 -0.0407 -0.0715]\n",
      " [-0.0269 -0.0416 -0.0679 -0.054  -0.021 ]\n",
      " [-0.0029 -0.0273 -0.0128 -0.0209 -0.0056]\n",
      " [-0.0045 -0.0032  0.0092  0.0084 -0.0168]\n",
      " [-0.0008 -0.0016  0.0275 -0.0089 -0.001 ]]\n",
      "mean_state_value -0.028868515466877236\n",
      "episode 95/600\n",
      "p1 0.6768000000000001 p0 0.08079999999999998\n",
      "trajectorySteps 15\n",
      "[[1 2 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 1 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0444 -0.1451 -0.1467 -0.0406 -0.0713]\n",
      " [-0.0268 -0.0415 -0.0677 -0.0539 -0.021 ]\n",
      " [-0.003  -0.0272 -0.0127 -0.0217 -0.0055]\n",
      " [-0.0045 -0.0032  0.01    0.0091 -0.0167]\n",
      " [-0.0008 -0.0016  0.0276 -0.0088 -0.001 ]]\n",
      "mean_state_value -0.0287649031847396\n",
      "episode 96/600\n",
      "p1 0.6776 p0 0.08059999999999998\n",
      "trajectorySteps 24\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [3 0 0 0 2]\n",
      " [0 0 1 0 3]\n",
      " [0 0 2 2 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0442 -0.1447 -0.1464 -0.0405 -0.0711]\n",
      " [-0.0268 -0.0414 -0.0676 -0.0538 -0.0209]\n",
      " [-0.0031 -0.0272 -0.0126 -0.0216 -0.0055]\n",
      " [-0.0045 -0.0032  0.0093  0.0091 -0.0168]\n",
      " [-0.0008 -0.0016  0.0283 -0.0087 -0.0011]]\n",
      "mean_state_value -0.028690695863815324\n",
      "episode 97/600\n",
      "p1 0.6784000000000001 p0 0.08039999999999999\n",
      "trajectorySteps 23\n",
      "[[1 2 1 1 1]\n",
      " [1 2 0 1 3]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0441 -0.145  -0.146  -0.0404 -0.0709]\n",
      " [-0.0267 -0.0421 -0.0674 -0.0536 -0.0208]\n",
      " [-0.0031 -0.0271 -0.0126 -0.0216 -0.0055]\n",
      " [-0.0045 -0.0032  0.0101  0.0092 -0.0167]\n",
      " [-0.0008 -0.0015  0.0283 -0.0087 -0.0011]]\n",
      "mean_state_value -0.028638757066150516\n",
      "episode 98/600\n",
      "p1 0.6792 p0 0.08019999999999998\n",
      "trajectorySteps 23\n",
      "[[2 1 2 1 1]\n",
      " [3 0 0 0 1]\n",
      " [2 2 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.044  -0.1447 -0.1456 -0.0403 -0.0708]\n",
      " [-0.0267 -0.042  -0.0672 -0.0535 -0.0208]\n",
      " [-0.0031 -0.027  -0.0125 -0.0215 -0.0055]\n",
      " [-0.0045 -0.0032  0.0108  0.0092 -0.0167]\n",
      " [-0.0008 -0.0015  0.029  -0.0086 -0.0011]]\n",
      "mean_state_value -0.02850307598395143\n",
      "episode 99/600\n",
      "p1 0.68 p0 0.07999999999999999\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0439 -0.1443 -0.1453 -0.0402 -0.0706]\n",
      " [-0.0266 -0.0418 -0.0671 -0.0534 -0.0207]\n",
      " [-0.0031 -0.027  -0.0124 -0.0214 -0.0055]\n",
      " [-0.0045 -0.0032  0.0116  0.0092 -0.0166]\n",
      " [-0.0008 -0.0015  0.0297 -0.0085 -0.0011]]\n",
      "mean_state_value -0.028362565137301142\n",
      "episode 100/600\n",
      "p1 0.6808000000000001 p0 0.07979999999999998\n",
      "trajectorySteps 29\n",
      "[[4 3 1 1 0]\n",
      " [0 2 0 1 2]\n",
      " [3 1 0 1 2]\n",
      " [1 0 1 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0439 -0.1446 -0.1449 -0.0401 -0.0704]\n",
      " [-0.0266 -0.0417 -0.0669 -0.0532 -0.0208]\n",
      " [-0.0031 -0.0277 -0.0123 -0.0214 -0.0055]\n",
      " [-0.0045 -0.0032  0.0124  0.0092 -0.0166]\n",
      " [-0.0008 -0.0015  0.0305 -0.0084 -0.0011]]\n",
      "mean_state_value -0.02828347651200564\n",
      "episode 101/600\n",
      "p1 0.6816 p0 0.07959999999999998\n",
      "trajectorySteps 16\n",
      "[[1 1 1 2 4]\n",
      " [1 0 0 0 1]\n",
      " [1 0 1 1 1]\n",
      " [0 0 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0438 -0.1443 -0.1445 -0.04   -0.0703]\n",
      " [-0.0265 -0.0416 -0.0667 -0.0531 -0.0207]\n",
      " [-0.0031 -0.0276 -0.0116 -0.0221 -0.0055]\n",
      " [-0.0044 -0.0032  0.0132  0.0093 -0.0166]\n",
      " [-0.0008 -0.0015  0.0305 -0.0084 -0.0011]]\n",
      "mean_state_value -0.0281763340360839\n",
      "episode 102/600\n",
      "p1 0.6824000000000001 p0 0.07939999999999998\n",
      "trajectorySteps 24\n",
      "[[4 3 2 2 3]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0438 -0.1439 -0.1442 -0.0399 -0.0702]\n",
      " [-0.0264 -0.0415 -0.0666 -0.053  -0.0206]\n",
      " [-0.0031 -0.0275 -0.0115 -0.0221 -0.0054]\n",
      " [-0.0044 -0.0032  0.014   0.01   -0.0165]\n",
      " [-0.0008 -0.0015  0.0306 -0.0091 -0.0011]]\n",
      "mean_state_value -0.028072887346564417\n",
      "episode 103/600\n",
      "p1 0.6832 p0 0.07919999999999998\n",
      "trajectorySteps 19\n",
      "[[0 1 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [3 1 0 1 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0437 -0.1436 -0.1438 -0.0398 -0.07  ]\n",
      " [-0.0271 -0.0414 -0.0664 -0.0528 -0.0206]\n",
      " [-0.0032 -0.0275 -0.0114 -0.022  -0.0054]\n",
      " [-0.0044 -0.0032  0.0133  0.01   -0.0165]\n",
      " [-0.0008 -0.0015  0.0314 -0.009  -0.0011]]\n",
      "mean_state_value -0.028022403800094024\n",
      "episode 104/600\n",
      "p1 0.684 p0 0.07899999999999999\n",
      "trajectorySteps 28\n",
      "[[1 1 1 1 1]\n",
      " [1 0 1 1 1]\n",
      " [2 0 0 0 3]\n",
      " [1 0 1 0 5]\n",
      " [0 0 1 1 5]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0436 -0.1432 -0.1441 -0.0397 -0.0699]\n",
      " [-0.0271 -0.0413 -0.0662 -0.0527 -0.0205]\n",
      " [-0.0032 -0.0274 -0.0113 -0.022  -0.0054]\n",
      " [-0.0044 -0.0032  0.0141  0.01   -0.0165]\n",
      " [-0.0008 -0.0015  0.0322 -0.009  -0.0012]]\n",
      "mean_state_value -0.027915954997830873\n",
      "episode 105/600\n",
      "p1 0.6848000000000001 p0 0.07879999999999998\n",
      "trajectorySteps 53\n",
      "[[3 5 6 4 2]\n",
      " [4 0 0 0 2]\n",
      " [3 1 0 0 3]\n",
      " [1 2 1 0 2]\n",
      " [1 3 4 4 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0435 -0.1429 -0.1438 -0.0396 -0.0697]\n",
      " [-0.027  -0.0412 -0.0661 -0.0526 -0.0205]\n",
      " [-0.0032 -0.0273 -0.0113 -0.0219 -0.0055]\n",
      " [-0.0052 -0.0039  0.0148  0.0101 -0.0165]\n",
      " [-0.0008 -0.0023  0.0321 -0.0089 -0.0012]]\n",
      "mean_state_value -0.027909711247023673\n",
      "episode 106/600\n",
      "p1 0.6856 p0 0.07859999999999998\n",
      "trajectorySteps 31\n",
      "[[1 2 5 4 2]\n",
      " [1 0 0 0 2]\n",
      " [3 1 0 0 4]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0433 -0.1425 -0.1435 -0.0395 -0.0695]\n",
      " [-0.0269 -0.0411 -0.0659 -0.0524 -0.0204]\n",
      " [-0.0032 -0.0273 -0.0112 -0.0218 -0.0056]\n",
      " [-0.0052 -0.0039  0.0156  0.0101 -0.0164]\n",
      " [-0.0008 -0.0023  0.0328 -0.0088 -0.0012]]\n",
      "mean_state_value -0.02777482871551562\n",
      "episode 107/600\n",
      "p1 0.6864000000000001 p0 0.07839999999999998\n",
      "trajectorySteps 34\n",
      "[[0 1 4 4 7]\n",
      " [1 1 0 0 3]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 1 4 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0432 -0.1422 -0.1432 -0.0394 -0.0694]\n",
      " [-0.0276 -0.041  -0.0657 -0.0523 -0.0204]\n",
      " [-0.0032 -0.0272 -0.0111 -0.0218 -0.0056]\n",
      " [-0.0052 -0.0039  0.0164  0.0101 -0.0164]\n",
      " [-0.0008 -0.0023  0.0328 -0.0087 -0.0012]]\n",
      "mean_state_value -0.027696038060309017\n",
      "episode 108/600\n",
      "p1 0.6872 p0 0.07819999999999998\n",
      "trajectorySteps 20\n",
      "[[3 3 1 0 0]\n",
      " [1 0 1 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0432 -0.1418 -0.1435 -0.0393 -0.0693]\n",
      " [-0.0276 -0.0409 -0.0656 -0.0522 -0.0203]\n",
      " [-0.0031 -0.0271 -0.011  -0.0217 -0.0056]\n",
      " [-0.0051 -0.0039  0.0172  0.0101 -0.0163]\n",
      " [-0.0008 -0.0023  0.0335 -0.0086 -0.0012]]\n",
      "mean_state_value -0.027582013094375895\n",
      "episode 109/600\n",
      "p1 0.6880000000000001 p0 0.07799999999999999\n",
      "trajectorySteps 21\n",
      "[[3 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0431 -0.1415 -0.1431 -0.0392 -0.0691]\n",
      " [-0.0275 -0.0408 -0.0654 -0.052  -0.0203]\n",
      " [-0.0031 -0.0271 -0.011  -0.0217 -0.0056]\n",
      " [-0.0051 -0.0039  0.018   0.0102 -0.0163]\n",
      " [-0.0008 -0.0023  0.0343 -0.0086 -0.0012]]\n",
      "mean_state_value -0.027440603878211043\n",
      "episode 110/600\n",
      "p1 0.6888000000000001 p0 0.07779999999999998\n",
      "trajectorySteps 21\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 1 3]\n",
      " [2 0 0 0 2]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0431 -0.1411 -0.1427 -0.0391 -0.0689]\n",
      " [-0.0274 -0.0407 -0.0652 -0.0519 -0.0203]\n",
      " [-0.0031 -0.027  -0.0109 -0.0216 -0.0056]\n",
      " [-0.0051 -0.0039  0.0173  0.0102 -0.0163]\n",
      " [-0.0008 -0.0023  0.0351 -0.0085 -0.0012]]\n",
      "mean_state_value -0.02736093651893635\n",
      "episode 111/600\n",
      "p1 0.6896 p0 0.07759999999999997\n",
      "trajectorySteps 23\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 4]\n",
      " [0 0 1 0 4]\n",
      " [0 1 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.043  -0.1407 -0.1424 -0.039  -0.0687]\n",
      " [-0.0273 -0.0406 -0.0651 -0.0518 -0.0202]\n",
      " [-0.0031 -0.0269 -0.0108 -0.0216 -0.0058]\n",
      " [-0.0051 -0.0039  0.0181  0.0102 -0.0162]\n",
      " [-0.0008 -0.0022  0.0351 -0.0085 -0.0012]]\n",
      "mean_state_value -0.02725321641580174\n",
      "episode 112/600\n",
      "p1 0.6904000000000001 p0 0.07739999999999998\n",
      "trajectorySteps 29\n",
      "[[5 2 1 1 2]\n",
      " [2 1 0 0 1]\n",
      " [4 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 4 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.043  -0.1411 -0.142  -0.0389 -0.0686]\n",
      " [-0.0273 -0.0405 -0.0649 -0.0516 -0.0202]\n",
      " [-0.0031 -0.0269 -0.0107 -0.0215 -0.0057]\n",
      " [-0.0051 -0.0039  0.0189  0.0103 -0.0162]\n",
      " [-0.0008 -0.0022  0.0359 -0.0086 -0.0012]]\n",
      "mean_state_value -0.027153147444652283\n",
      "episode 113/600\n",
      "p1 0.6912 p0 0.07719999999999998\n",
      "trajectorySteps 19\n",
      "[[2 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 1 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0429 -0.1407 -0.1416 -0.0388 -0.0684]\n",
      " [-0.0272 -0.0404 -0.0647 -0.0515 -0.0201]\n",
      " [-0.0031 -0.0268 -0.0106 -0.0215 -0.0057]\n",
      " [-0.0051 -0.0039  0.0198  0.0103 -0.0162]\n",
      " [-0.0008 -0.0022  0.0366 -0.0085 -0.0012]]\n",
      "mean_state_value -0.02701053533962589\n",
      "episode 114/600\n",
      "p1 0.6920000000000001 p0 0.07699999999999999\n",
      "trajectorySteps 18\n",
      "[[1 1 1 0 0]\n",
      " [1 0 1 1 2]\n",
      " [2 0 0 1 1]\n",
      " [1 0 1 2 2]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0428 -0.1403 -0.1419 -0.0387 -0.0683]\n",
      " [-0.0271 -0.0403 -0.0646 -0.0514 -0.0201]\n",
      " [-0.0031 -0.0267 -0.0106 -0.0222 -0.0057]\n",
      " [-0.0051 -0.0038  0.0206  0.011  -0.017 ]\n",
      " [-0.0008 -0.0022  0.0367 -0.0085 -0.0012]]\n",
      "mean_state_value -0.026957819599494757\n",
      "episode 115/600\n",
      "p1 0.6928000000000001 p0 0.07679999999999998\n",
      "trajectorySteps 25\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 4]\n",
      " [2 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 3 3 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0427 -0.14   -0.1416 -0.0386 -0.0681]\n",
      " [-0.0271 -0.0402 -0.0644 -0.0512 -0.0201]\n",
      " [-0.0032 -0.0266 -0.0105 -0.0221 -0.0057]\n",
      " [-0.0051 -0.0038  0.0207  0.0111 -0.0169]\n",
      " [-0.0008 -0.0022  0.0375 -0.0085 -0.0012]]\n",
      "mean_state_value -0.02684454390327544\n",
      "episode 116/600\n",
      "p1 0.6936 p0 0.07659999999999997\n",
      "trajectorySteps 24\n",
      "[[1 1 5 2 1]\n",
      " [1 0 0 0 4]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0426 -0.1396 -0.1413 -0.0385 -0.0679]\n",
      " [-0.027  -0.0401 -0.0642 -0.0511 -0.0201]\n",
      " [-0.0032 -0.0266 -0.0104 -0.022  -0.0057]\n",
      " [-0.005  -0.0038  0.0215  0.0111 -0.0169]\n",
      " [-0.0008 -0.0022  0.0383 -0.0084 -0.0011]]\n",
      "mean_state_value -0.026703649895916746\n",
      "episode 117/600\n",
      "p1 0.6944000000000001 p0 0.07639999999999998\n",
      "trajectorySteps 25\n",
      "[[1 2 2 2 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 2 5]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0425 -0.1392 -0.1409 -0.0384 -0.0677]\n",
      " [-0.0269 -0.04   -0.064  -0.051  -0.02  ]\n",
      " [-0.0031 -0.0265 -0.0103 -0.022  -0.0057]\n",
      " [-0.005  -0.0038  0.0208  0.0111 -0.0169]\n",
      " [-0.0008 -0.0022  0.0391 -0.0083 -0.0013]]\n",
      "mean_state_value -0.026622631794304793\n",
      "episode 118/600\n",
      "p1 0.6952 p0 0.07619999999999998\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0424 -0.1389 -0.1405 -0.0383 -0.0675]\n",
      " [-0.0269 -0.0399 -0.0639 -0.0508 -0.02  ]\n",
      " [-0.0032 -0.0264 -0.0103 -0.0219 -0.0056]\n",
      " [-0.005  -0.0038  0.0216  0.0111 -0.0168]\n",
      " [-0.0008 -0.0021  0.0399 -0.0083 -0.0014]]\n",
      "mean_state_value -0.026481651962123213\n",
      "episode 119/600\n",
      "p1 0.6960000000000001 p0 0.07599999999999998\n",
      "trajectorySteps 22\n",
      "[[1 1 3 1 0]\n",
      " [2 0 0 1 3]\n",
      " [2 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0423 -0.1385 -0.1402 -0.0382 -0.0674]\n",
      " [-0.0269 -0.0398 -0.0637 -0.0507 -0.02  ]\n",
      " [-0.0032 -0.0264 -0.0102 -0.0219 -0.0056]\n",
      " [-0.005  -0.0038  0.0225  0.0112 -0.0168]\n",
      " [-0.0008 -0.0021  0.0407 -0.0082 -0.0014]]\n",
      "mean_state_value -0.026341553958367462\n",
      "episode 120/600\n",
      "p1 0.6968000000000001 p0 0.07579999999999998\n",
      "trajectorySteps 22\n",
      "[[1 2 2 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 2]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0421 -0.1382 -0.1398 -0.0382 -0.0672]\n",
      " [-0.0268 -0.0397 -0.0635 -0.0506 -0.0199]\n",
      " [-0.0032 -0.0263 -0.0101 -0.0218 -0.0056]\n",
      " [-0.005  -0.0038  0.0233  0.0112 -0.0168]\n",
      " [-0.0008 -0.0021  0.0415 -0.0081 -0.0014]]\n",
      "mean_state_value -0.026203602959420643\n",
      "episode 121/600\n",
      "p1 0.6976 p0 0.07559999999999997\n",
      "trajectorySteps 22\n",
      "[[0 1 2 0 0]\n",
      " [0 1 1 1 2]\n",
      " [3 1 0 0 1]\n",
      " [1 0 1 0 2]\n",
      " [0 0 1 3 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.042  -0.1378 -0.1401 -0.0381 -0.067 ]\n",
      " [-0.0267 -0.0395 -0.0634 -0.0504 -0.0199]\n",
      " [-0.0032 -0.027  -0.01   -0.0218 -0.0056]\n",
      " [-0.005  -0.0038  0.0241  0.0112 -0.0168]\n",
      " [-0.0008 -0.0021  0.0423 -0.0081 -0.0014]]\n",
      "mean_state_value -0.026117951685330234\n",
      "episode 122/600\n",
      "p1 0.6984000000000001 p0 0.07539999999999998\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 3]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 1 3]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0419 -0.1374 -0.1398 -0.038  -0.0669]\n",
      " [-0.0266 -0.0394 -0.0632 -0.0503 -0.0198]\n",
      " [-0.0032 -0.0269 -0.01   -0.0217 -0.0056]\n",
      " [-0.005  -0.0038  0.0249  0.012  -0.0175]\n",
      " [-0.0008 -0.0021  0.0424 -0.0081 -0.0014]]\n",
      "mean_state_value -0.02600335166604217\n",
      "episode 123/600\n",
      "p1 0.6992 p0 0.07519999999999998\n",
      "trajectorySteps 18\n",
      "[[1 2 2 2 1]\n",
      " [1 1 0 0 2]\n",
      " [2 0 0 1 1]\n",
      " [0 0 1 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0418 -0.1377 -0.1394 -0.0379 -0.0667]\n",
      " [-0.0266 -0.0393 -0.063  -0.0502 -0.0198]\n",
      " [-0.0032 -0.0268 -0.0099 -0.0224 -0.0056]\n",
      " [-0.005  -0.0038  0.0257  0.0127 -0.0175]\n",
      " [-0.0008 -0.0021  0.0425 -0.008  -0.0014]]\n",
      "mean_state_value -0.025914088512211927\n",
      "episode 124/600\n",
      "p1 0.7000000000000001 p0 0.07499999999999998\n",
      "trajectorySteps 33\n",
      "[[5 3 3 3 2]\n",
      " [3 1 0 1 3]\n",
      " [4 1 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0418 -0.138  -0.139  -0.0378 -0.0665]\n",
      " [-0.0265 -0.0392 -0.0629 -0.05   -0.0198]\n",
      " [-0.0033 -0.0268 -0.0098 -0.0223 -0.0056]\n",
      " [-0.0049 -0.0037  0.0265  0.0135 -0.0182]\n",
      " [-0.0007 -0.0021  0.0426 -0.008  -0.0014]]\n",
      "mean_state_value -0.02583234612763794\n",
      "episode 125/600\n",
      "p1 0.7008000000000001 p0 0.07479999999999998\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [2 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0417 -0.1376 -0.1386 -0.0377 -0.0663]\n",
      " [-0.0272 -0.0399 -0.0627 -0.0499 -0.0197]\n",
      " [-0.0033 -0.0267 -0.0097 -0.0223 -0.0056]\n",
      " [-0.0049 -0.0037  0.0274  0.0135 -0.0181]\n",
      " [-0.0007 -0.0021  0.0434 -0.0079 -0.0014]]\n",
      "mean_state_value -0.025744634462060764\n",
      "episode 126/600\n",
      "p1 0.7016 p0 0.07459999999999997\n",
      "trajectorySteps 20\n",
      "[[1 3 4 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0416 -0.1373 -0.1383 -0.0376 -0.0662]\n",
      " [-0.0271 -0.0397 -0.0625 -0.0498 -0.0197]\n",
      " [-0.0033 -0.0266 -0.0097 -0.0222 -0.0056]\n",
      " [-0.0049 -0.0037  0.0275  0.0136 -0.0181]\n",
      " [-0.0007 -0.0021  0.0442 -0.0078 -0.0014]]\n",
      "mean_state_value -0.025623704935137055\n",
      "episode 127/600\n",
      "p1 0.7024000000000001 p0 0.07439999999999998\n",
      "trajectorySteps 28\n",
      "[[2 1 3 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [3 0 1 0 1]\n",
      " [1 0 2 2 4]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0414 -0.1369 -0.138  -0.0375 -0.066 ]\n",
      " [-0.027  -0.0396 -0.0624 -0.0496 -0.0196]\n",
      " [-0.0033 -0.0265 -0.0096 -0.0221 -0.0056]\n",
      " [-0.005  -0.0037  0.0283  0.0136 -0.018 ]\n",
      " [-0.0007 -0.0021  0.0449 -0.0077 -0.0015]]\n",
      "mean_state_value -0.025489336953822286\n",
      "episode 128/600\n",
      "p1 0.7032 p0 0.07419999999999997\n",
      "trajectorySteps 23\n",
      "[[1 4 2 2 4]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0413 -0.1366 -0.1376 -0.0374 -0.0659]\n",
      " [-0.0269 -0.0395 -0.0622 -0.0495 -0.0196]\n",
      " [-0.0033 -0.0265 -0.0095 -0.0221 -0.0056]\n",
      " [-0.005  -0.0037  0.0284  0.0136 -0.018 ]\n",
      " [-0.0007 -0.0021  0.0457 -0.0076 -0.0015]]\n",
      "mean_state_value -0.025377489943689907\n",
      "episode 129/600\n",
      "p1 0.7040000000000001 p0 0.07399999999999998\n",
      "trajectorySteps 25\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 5]\n",
      " [3 0 0 0 2]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 2 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0412 -0.1363 -0.1372 -0.0373 -0.0657]\n",
      " [-0.0269 -0.0394 -0.062  -0.0494 -0.0196]\n",
      " [-0.0034 -0.0264 -0.0094 -0.022  -0.0056]\n",
      " [-0.0049 -0.0037  0.0293  0.0136 -0.0179]\n",
      " [-0.0007 -0.0021  0.0465 -0.0076 -0.0015]]\n",
      "mean_state_value -0.02523811510397459\n",
      "episode 130/600\n",
      "p1 0.7048000000000001 p0 0.07379999999999998\n",
      "trajectorySteps 17\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [4 1 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0411 -0.1359 -0.1369 -0.0372 -0.0655]\n",
      " [-0.0268 -0.0393 -0.0619 -0.0492 -0.0196]\n",
      " [-0.0035 -0.0263 -0.0093 -0.022  -0.0056]\n",
      " [-0.0049 -0.0037  0.0294  0.0144 -0.0186]\n",
      " [-0.0007 -0.0021  0.0466 -0.0075 -0.0015]]\n",
      "mean_state_value -0.025149011693378284\n",
      "episode 131/600\n",
      "p1 0.7056 p0 0.07359999999999997\n",
      "trajectorySteps 32\n",
      "[[0 1 3 4 2]\n",
      " [1 1 0 4 2]\n",
      " [3 0 0 0 1]\n",
      " [0 0 1 0 3]\n",
      " [0 0 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.041  -0.1355 -0.1365 -0.0372 -0.0654]\n",
      " [-0.0274 -0.0392 -0.0617 -0.0491 -0.0195]\n",
      " [-0.0035 -0.0263 -0.0093 -0.0219 -0.0056]\n",
      " [-0.0049 -0.0037  0.0288  0.0144 -0.0186]\n",
      " [-0.0007 -0.0021  0.0474 -0.0074 -0.0015]]\n",
      "mean_state_value -0.02509131185811442\n",
      "episode 132/600\n",
      "p1 0.7064000000000001 p0 0.07339999999999998\n",
      "trajectorySteps 40\n",
      "[[1 2 1 1 1]\n",
      " [1 1 0 0 5]\n",
      " [3 0 0 4 9]\n",
      " [0 0 1 1 3]\n",
      " [0 0 1 2 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0409 -0.1358 -0.1361 -0.0371 -0.0652]\n",
      " [-0.0274 -0.0391 -0.0615 -0.049  -0.0195]\n",
      " [-0.0036 -0.0262 -0.0092 -0.0218 -0.0056]\n",
      " [-0.0049 -0.0037  0.0296  0.0145 -0.0186]\n",
      " [-0.0007 -0.002   0.0482 -0.008  -0.0016]]\n",
      "mean_state_value -0.025006145380728206\n",
      "episode 133/600\n",
      "p1 0.7072 p0 0.07319999999999997\n",
      "trajectorySteps 24\n",
      "[[1 1 1 1 0]\n",
      " [1 0 2 3 4]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0408 -0.1354 -0.1358 -0.037  -0.065 ]\n",
      " [-0.0273 -0.039  -0.0614 -0.0502 -0.0195]\n",
      " [-0.0036 -0.0261 -0.0091 -0.0218 -0.0056]\n",
      " [-0.0049 -0.0037  0.0305  0.0145 -0.0185]\n",
      " [-0.0007 -0.002   0.049  -0.0079 -0.0016]]\n",
      "mean_state_value -0.02491890609179551\n",
      "episode 134/600\n",
      "p1 0.7080000000000001 p0 0.07299999999999998\n",
      "trajectorySteps 19\n",
      "[[1 2 1 1 1]\n",
      " [2 0 0 0 2]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0407 -0.1351 -0.1354 -0.0369 -0.0649]\n",
      " [-0.0273 -0.0389 -0.0612 -0.0501 -0.0195]\n",
      " [-0.0036 -0.026  -0.009  -0.0217 -0.0056]\n",
      " [-0.0049 -0.0036  0.0313  0.0145 -0.0185]\n",
      " [-0.0007 -0.002   0.0498 -0.0078 -0.0016]]\n",
      "mean_state_value -0.024774942654729844\n",
      "episode 135/600\n",
      "p1 0.7088000000000001 p0 0.07279999999999998\n",
      "trajectorySteps 18\n",
      "[[2 3 1 0 0]\n",
      " [1 0 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0406 -0.1347 -0.1356 -0.0368 -0.0647]\n",
      " [-0.0272 -0.0388 -0.061  -0.0499 -0.0194]\n",
      " [-0.0036 -0.026  -0.009  -0.0217 -0.0056]\n",
      " [-0.0049 -0.0036  0.0321  0.0146 -0.0184]\n",
      " [-0.0007 -0.002   0.0506 -0.0078 -0.0016]]\n",
      "mean_state_value -0.02465381340938273\n",
      "episode 136/600\n",
      "p1 0.7096 p0 0.07259999999999997\n",
      "trajectorySteps 24\n",
      "[[2 4 3 2 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0405 -0.1344 -0.1353 -0.0367 -0.0645]\n",
      " [-0.0271 -0.0387 -0.0609 -0.0498 -0.0194]\n",
      " [-0.0036 -0.0259 -0.0089 -0.0216 -0.0056]\n",
      " [-0.0049 -0.0036  0.0315  0.0146 -0.0184]\n",
      " [-0.0007 -0.002   0.0514 -0.0077 -0.0016]]\n",
      "mean_state_value -0.024564078646912332\n",
      "episode 137/600\n",
      "p1 0.7104000000000001 p0 0.07239999999999998\n",
      "trajectorySteps 10\n",
      "[[1 1 1 0 0]\n",
      " [2 0 1 0 0]\n",
      " [2 0 1 0 0]\n",
      " [0 0 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0404 -0.134  -0.1355 -0.0366 -0.0643]\n",
      " [-0.0271 -0.0386 -0.0614 -0.0496 -0.0193]\n",
      " [-0.0037 -0.0258 -0.0081 -0.0215 -0.0056]\n",
      " [-0.0048 -0.0036  0.0324  0.0146 -0.0183]\n",
      " [-0.0007 -0.002   0.0515 -0.0077 -0.0016]]\n",
      "mean_state_value -0.02447205731645171\n",
      "episode 138/600\n",
      "p1 0.7112 p0 0.07219999999999997\n",
      "trajectorySteps 31\n",
      "[[1 4 1 1 1]\n",
      " [1 2 0 0 1]\n",
      " [2 1 0 0 3]\n",
      " [0 0 1 0 6]\n",
      " [0 0 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0403 -0.1349 -0.1351 -0.0365 -0.0641]\n",
      " [-0.0271 -0.0385 -0.0612 -0.0495 -0.0193]\n",
      " [-0.0037 -0.0258 -0.008  -0.0215 -0.0056]\n",
      " [-0.0048 -0.0036  0.0332  0.0147 -0.0183]\n",
      " [-0.0007 -0.002   0.0523 -0.0076 -0.0016]]\n",
      "mean_state_value -0.02437291719534052\n",
      "episode 139/600\n",
      "p1 0.7120000000000001 p0 0.07199999999999998\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 3]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0402 -0.1345 -0.1348 -0.0364 -0.064 ]\n",
      " [-0.027  -0.0384 -0.0611 -0.0494 -0.0192]\n",
      " [-0.0037 -0.0257 -0.0079 -0.0214 -0.0055]\n",
      " [-0.0048 -0.0036  0.0341  0.0147 -0.0182]\n",
      " [-0.0007 -0.002   0.0532 -0.0075 -0.0016]]\n",
      "mean_state_value -0.024225332107940322\n",
      "episode 140/600\n",
      "p1 0.7128000000000001 p0 0.07179999999999997\n",
      "trajectorySteps 23\n",
      "[[4 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 1 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0401 -0.1342 -0.1344 -0.0363 -0.0638]\n",
      " [-0.0269 -0.0383 -0.0609 -0.0492 -0.0191]\n",
      " [-0.0038 -0.0256 -0.0078 -0.0214 -0.0055]\n",
      " [-0.0048 -0.0036  0.0349  0.0147 -0.0182]\n",
      " [-0.0007 -0.002   0.054  -0.0075 -0.0016]]\n",
      "mean_state_value -0.024084374146989912\n",
      "episode 141/600\n",
      "p1 0.7136 p0 0.07159999999999997\n",
      "trajectorySteps 4\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 1 0 0]\n",
      " [0 0 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.04   -0.1338 -0.134  -0.0362 -0.0636]\n",
      " [-0.0268 -0.0381 -0.0607 -0.0491 -0.0191]\n",
      " [-0.0038 -0.0263 -0.007  -0.0213 -0.0055]\n",
      " [-0.0048 -0.0036  0.0351  0.0148 -0.0181]\n",
      " [-0.0007 -0.002   0.0541 -0.0075 -0.0016]]\n",
      "mean_state_value -0.02398952761415731\n",
      "episode 142/600\n",
      "p1 0.7144000000000001 p0 0.07139999999999998\n",
      "trajectorySteps 20\n",
      "[[2 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0399 -0.1334 -0.1337 -0.0361 -0.0634]\n",
      " [-0.0268 -0.038  -0.0605 -0.049  -0.019 ]\n",
      " [-0.0038 -0.0262 -0.0069 -0.0212 -0.0055]\n",
      " [-0.0048 -0.0036  0.0345  0.0148 -0.0181]\n",
      " [-0.0007 -0.002   0.0549 -0.0074 -0.0016]]\n",
      "mean_state_value -0.023896661141748633\n",
      "episode 143/600\n",
      "p1 0.7152000000000001 p0 0.07119999999999997\n",
      "trajectorySteps 21\n",
      "[[1 1 1 3 3]\n",
      " [1 0 0 1 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0398 -0.1331 -0.1333 -0.036  -0.0633]\n",
      " [-0.0267 -0.0379 -0.0604 -0.0488 -0.019 ]\n",
      " [-0.0038 -0.0261 -0.0068 -0.0212 -0.0055]\n",
      " [-0.0048 -0.0036  0.0346  0.0148 -0.018 ]\n",
      " [-0.0007 -0.002   0.0557 -0.0073 -0.0016]]\n",
      "mean_state_value -0.023774428829317937\n",
      "episode 144/600\n",
      "p1 0.7160000000000001 p0 0.07099999999999998\n",
      "trajectorySteps 25\n",
      "[[1 2 2 2 1]\n",
      " [2 2 0 0 2]\n",
      " [1 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0397 -0.1327 -0.1329 -0.0359 -0.0631]\n",
      " [-0.0273 -0.0385 -0.0602 -0.0487 -0.0189]\n",
      " [-0.0038 -0.026  -0.0068 -0.0211 -0.0055]\n",
      " [-0.0047 -0.0035  0.0355  0.0148 -0.018 ]\n",
      " [-0.0007 -0.002   0.0566 -0.0072 -0.0018]]\n",
      "mean_state_value -0.02368527711965894\n",
      "episode 145/600\n",
      "p1 0.7168000000000001 p0 0.07079999999999997\n",
      "trajectorySteps 28\n",
      "[[1 1 1 2 3]\n",
      " [1 0 0 0 2]\n",
      " [2 1 0 1 2]\n",
      " [0 0 1 0 3]\n",
      " [0 0 2 3 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0396 -0.1323 -0.1325 -0.0358 -0.063 ]\n",
      " [-0.0272 -0.0384 -0.06   -0.0485 -0.0189]\n",
      " [-0.0038 -0.026  -0.0067 -0.0211 -0.0055]\n",
      " [-0.0047 -0.0035  0.0349  0.0149 -0.018 ]\n",
      " [-0.0007 -0.002   0.0574 -0.0071 -0.0017]]\n",
      "mean_state_value -0.02359187639883504\n",
      "episode 146/600\n",
      "p1 0.7176 p0 0.07059999999999997\n",
      "trajectorySteps 17\n",
      "[[3 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0395 -0.1319 -0.1322 -0.0357 -0.0628]\n",
      " [-0.0271 -0.0383 -0.0599 -0.0484 -0.0188]\n",
      " [-0.0038 -0.0259 -0.0066 -0.021  -0.0055]\n",
      " [-0.0047 -0.0035  0.0344  0.0149 -0.0179]\n",
      " [-0.0007 -0.002   0.0582 -0.007  -0.0017]]\n",
      "mean_state_value -0.02350108956287521\n",
      "episode 147/600\n",
      "p1 0.7184000000000001 p0 0.07039999999999998\n",
      "trajectorySteps 22\n",
      "[[2 4 3 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 1 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0395 -0.1316 -0.1318 -0.0356 -0.0626]\n",
      " [-0.0271 -0.0382 -0.0597 -0.0483 -0.0188]\n",
      " [-0.0038 -0.0258 -0.0065 -0.021  -0.0055]\n",
      " [-0.0047 -0.0035  0.0352  0.0149 -0.0179]\n",
      " [-0.0007 -0.0019  0.059  -0.0069 -0.0017]]\n",
      "mean_state_value -0.023351807052228238\n",
      "episode 148/600\n",
      "p1 0.7192000000000001 p0 0.07019999999999997\n",
      "trajectorySteps 21\n",
      "[[2 2 1 1 1]\n",
      " [1 0 0 1 2]\n",
      " [2 0 1 1 2]\n",
      " [0 0 1 1 2]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0394 -0.1312 -0.1314 -0.0355 -0.0624]\n",
      " [-0.027  -0.0381 -0.0595 -0.0481 -0.0187]\n",
      " [-0.0038 -0.0257 -0.0057 -0.0216 -0.0055]\n",
      " [-0.0047 -0.0035  0.0361  0.015  -0.0186]\n",
      " [-0.0007 -0.0019  0.0592 -0.0068 -0.0017]]\n",
      "mean_state_value -0.02325913555739513\n",
      "episode 149/600\n",
      "p1 0.7200000000000001 p0 0.06999999999999998\n",
      "trajectorySteps 26\n",
      "[[2 1 1 2 1]\n",
      " [3 0 0 0 2]\n",
      " [5 1 0 0 1]\n",
      " [0 0 1 2 2]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0393 -0.1308 -0.131  -0.0354 -0.0622]\n",
      " [-0.0269 -0.038  -0.0594 -0.048  -0.0187]\n",
      " [-0.0039 -0.0257 -0.0056 -0.0215 -0.0055]\n",
      " [-0.0047 -0.0035  0.0362  0.0151 -0.0186]\n",
      " [-0.0007 -0.0019  0.0593 -0.0075 -0.0017]]\n",
      "mean_state_value -0.023196495855969536\n",
      "episode 150/600\n",
      "p1 0.7208000000000001 p0 0.06979999999999997\n",
      "trajectorySteps 26\n",
      "[[2 1 0 0 0]\n",
      " [2 1 1 2 2]\n",
      " [3 0 0 1 4]\n",
      " [0 0 1 0 3]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0391 -0.1311 -0.1307 -0.0353 -0.0621]\n",
      " [-0.0268 -0.0385 -0.0592 -0.0479 -0.0186]\n",
      " [-0.0039 -0.0256 -0.0055 -0.0215 -0.0055]\n",
      " [-0.0047 -0.0035  0.0371  0.0151 -0.0185]\n",
      " [-0.0007 -0.0019  0.0601 -0.0074 -0.0017]]\n",
      "mean_state_value -0.02309344355393052\n",
      "episode 151/600\n",
      "p1 0.7216 p0 0.06959999999999997\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [2 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.039  -0.1307 -0.1303 -0.0352 -0.0619]\n",
      " [-0.0267 -0.0384 -0.059  -0.0477 -0.0186]\n",
      " [-0.0039 -0.0255 -0.0054 -0.0214 -0.0055]\n",
      " [-0.0047 -0.0035  0.0372  0.0159 -0.0185]\n",
      " [-0.0007 -0.0019  0.0602 -0.008  -0.0017]]\n",
      "mean_state_value -0.02299558214933231\n",
      "episode 152/600\n",
      "p1 0.7224000000000002 p0 0.06939999999999998\n",
      "trajectorySteps 24\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 1 6]\n",
      " [2 0 0 1 4]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0389 -0.1304 -0.1299 -0.0351 -0.0617]\n",
      " [-0.0267 -0.0383 -0.0589 -0.0476 -0.0185]\n",
      " [-0.0038 -0.0254 -0.0054 -0.0213 -0.0055]\n",
      " [-0.0046 -0.0035  0.0381  0.0167 -0.0191]\n",
      " [-0.0007 -0.0019  0.0603 -0.008  -0.0017]]\n",
      "mean_state_value -0.022870303469108344\n",
      "episode 153/600\n",
      "p1 0.7232000000000001 p0 0.06919999999999997\n",
      "trajectorySteps 22\n",
      "[[1 2 1 2 1]\n",
      " [1 2 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 1 3 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0388 -0.1306 -0.1295 -0.0351 -0.0615]\n",
      " [-0.0266 -0.0389 -0.0587 -0.0475 -0.0185]\n",
      " [-0.0038 -0.0254 -0.0053 -0.0213 -0.0055]\n",
      " [-0.0046 -0.0035  0.039   0.0167 -0.0191]\n",
      " [-0.0007 -0.0018  0.0604 -0.0079 -0.0017]]\n",
      "mean_state_value -0.02279704876332391\n",
      "episode 154/600\n",
      "p1 0.7240000000000001 p0 0.06899999999999998\n",
      "trajectorySteps 16\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0387 -0.1302 -0.1292 -0.035  -0.0614]\n",
      " [-0.0265 -0.0388 -0.0585 -0.0473 -0.0184]\n",
      " [-0.0038 -0.0253 -0.0052 -0.0212 -0.0054]\n",
      " [-0.0046 -0.0034  0.0398  0.0168 -0.0191]\n",
      " [-0.0007 -0.0018  0.0613 -0.0078 -0.0017]]\n",
      "mean_state_value -0.02264328220654905\n",
      "episode 155/600\n",
      "p1 0.7248000000000001 p0 0.06879999999999997\n",
      "trajectorySteps 34\n",
      "[[2 3 1 4 4]\n",
      " [1 0 0 2 2]\n",
      " [4 2 0 1 2]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0386 -0.1299 -0.1288 -0.0349 -0.0612]\n",
      " [-0.0264 -0.0387 -0.0583 -0.0472 -0.0183]\n",
      " [-0.0038 -0.0252 -0.0051 -0.0211 -0.0054]\n",
      " [-0.0046 -0.0034  0.0393  0.0168 -0.019 ]\n",
      " [-0.0007 -0.0018  0.0621 -0.0077 -0.0017]]\n",
      "mean_state_value -0.022543957741997828\n",
      "episode 156/600\n",
      "p1 0.7256 p0 0.06859999999999997\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [1 1 3 2 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 3 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0385 -0.1295 -0.1284 -0.0347 -0.061 ]\n",
      " [-0.027  -0.0392 -0.0588 -0.0477 -0.0183]\n",
      " [-0.0038 -0.0252 -0.005  -0.0211 -0.0054]\n",
      " [-0.0046 -0.0034  0.0402  0.0168 -0.0189]\n",
      " [-0.0007 -0.0018  0.0629 -0.0076 -0.0016]]\n",
      "mean_state_value -0.022495758225651655\n",
      "episode 157/600\n",
      "p1 0.7264000000000002 p0 0.06839999999999997\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [2 2 1 0 0]\n",
      " [0 1 1 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0384 -0.1291 -0.128  -0.0346 -0.0608]\n",
      " [-0.0269 -0.0391 -0.0587 -0.0475 -0.0182]\n",
      " [-0.0039 -0.0251 -0.005  -0.021  -0.0054]\n",
      " [-0.0053 -0.0048  0.0397  0.0169 -0.0189]\n",
      " [-0.0007 -0.0017  0.0637 -0.0075 -0.0016]]\n",
      "mean_state_value -0.022482705381434688\n",
      "episode 158/600\n",
      "p1 0.7272000000000001 p0 0.06819999999999997\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 3]\n",
      " [0 0 1 0 3]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0383 -0.1287 -0.1277 -0.0345 -0.0606]\n",
      " [-0.0269 -0.039  -0.0585 -0.0474 -0.0182]\n",
      " [-0.004  -0.025  -0.0049 -0.021  -0.0054]\n",
      " [-0.0052 -0.0048  0.0391  0.0169 -0.0188]\n",
      " [-0.0007 -0.0017  0.0646 -0.0074 -0.0016]]\n",
      "mean_state_value -0.022384695108795573\n",
      "episode 159/600\n",
      "p1 0.7280000000000001 p0 0.06799999999999998\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0381 -0.1283 -0.1273 -0.0344 -0.0605]\n",
      " [-0.0268 -0.0389 -0.0583 -0.0473 -0.0181]\n",
      " [-0.004  -0.0249 -0.0048 -0.0209 -0.0054]\n",
      " [-0.0052 -0.0048  0.04    0.0169 -0.0188]\n",
      " [-0.0007 -0.0017  0.0654 -0.0073 -0.0016]]\n",
      "mean_state_value -0.022228015543552075\n",
      "episode 160/600\n",
      "p1 0.7288000000000001 p0 0.06779999999999997\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.038  -0.128  -0.1269 -0.0343 -0.0603]\n",
      " [-0.0267 -0.0388 -0.0582 -0.0471 -0.0181]\n",
      " [-0.004  -0.0249 -0.0047 -0.0208 -0.0053]\n",
      " [-0.0052 -0.0047  0.0395  0.017  -0.0187]\n",
      " [-0.0007 -0.0017  0.0663 -0.0072 -0.0016]]\n",
      "mean_state_value -0.022127047596544713\n",
      "episode 161/600\n",
      "p1 0.7296 p0 0.06759999999999997\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0379 -0.1276 -0.1265 -0.0342 -0.0601]\n",
      " [-0.0266 -0.0386 -0.058  -0.047  -0.018 ]\n",
      " [-0.0039 -0.0248 -0.0046 -0.0208 -0.0053]\n",
      " [-0.0052 -0.0047  0.0404  0.017  -0.0187]\n",
      " [-0.0007 -0.0017  0.0671 -0.0071 -0.0016]]\n",
      "mean_state_value -0.02197266684053759\n",
      "episode 162/600\n",
      "p1 0.7304000000000002 p0 0.06739999999999997\n",
      "trajectorySteps 4\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 1 0 0]\n",
      " [0 0 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0378 -0.1272 -0.1262 -0.0341 -0.0599]\n",
      " [-0.0266 -0.0385 -0.0578 -0.0468 -0.018 ]\n",
      " [-0.0039 -0.0254 -0.0038 -0.0207 -0.0053]\n",
      " [-0.0052 -0.0047  0.0413  0.017  -0.0186]\n",
      " [-0.0007 -0.0017  0.0672 -0.0071 -0.0016]]\n",
      "mean_state_value -0.021842828531892487\n",
      "episode 163/600\n",
      "p1 0.7312000000000001 p0 0.06719999999999997\n",
      "trajectorySteps 15\n",
      "[[1 1 1 0 0]\n",
      " [1 0 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0377 -0.1268 -0.1264 -0.034  -0.0598]\n",
      " [-0.0265 -0.0384 -0.0576 -0.0467 -0.0179]\n",
      " [-0.004  -0.0253 -0.0037 -0.0207 -0.0053]\n",
      " [-0.0052 -0.0047  0.0422  0.0171 -0.0186]\n",
      " [-0.0007 -0.0017  0.0681 -0.007  -0.0016]]\n",
      "mean_state_value -0.02171108446556025\n",
      "episode 164/600\n",
      "p1 0.7320000000000001 p0 0.06699999999999998\n",
      "trajectorySteps 20\n",
      "[[1 1 2 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0376 -0.1265 -0.126  -0.0339 -0.0596]\n",
      " [-0.0264 -0.0383 -0.0575 -0.0466 -0.0179]\n",
      " [-0.004  -0.0252 -0.0036 -0.0206 -0.0053]\n",
      " [-0.0051 -0.0047  0.0416  0.0171 -0.0185]\n",
      " [-0.0007 -0.0016  0.0683 -0.0068 -0.0016]]\n",
      "mean_state_value -0.021637333621261606\n",
      "episode 165/600\n",
      "p1 0.7328000000000001 p0 0.06679999999999997\n",
      "trajectorySteps 14\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0375 -0.1261 -0.1257 -0.0339 -0.0594]\n",
      " [-0.0263 -0.0382 -0.0573 -0.0464 -0.0179]\n",
      " [-0.004  -0.0252 -0.0035 -0.0205 -0.0053]\n",
      " [-0.0051 -0.0047  0.0425  0.0179 -0.0191]\n",
      " [-0.0007 -0.0016  0.0684 -0.0068 -0.0016]]\n",
      "mean_state_value -0.02151004006030057\n",
      "episode 166/600\n",
      "p1 0.7336 p0 0.06659999999999996\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0374 -0.1257 -0.1253 -0.0338 -0.0593]\n",
      " [-0.0262 -0.0381 -0.0571 -0.0463 -0.0178]\n",
      " [-0.0041 -0.0251 -0.0035 -0.0205 -0.0052]\n",
      " [-0.0051 -0.0047  0.0427  0.018  -0.019 ]\n",
      " [-0.0007 -0.0016  0.0693 -0.0068 -0.0016]]\n",
      "mean_state_value -0.021387175023942454\n",
      "episode 167/600\n",
      "p1 0.7344000000000002 p0 0.06639999999999997\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 3]\n",
      " [2 1 0 0 1]\n",
      " [1 1 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0372 -0.1253 -0.1249 -0.0337 -0.0591]\n",
      " [-0.0262 -0.038  -0.057  -0.0461 -0.0178]\n",
      " [-0.0041 -0.025  -0.0034 -0.0204 -0.0052]\n",
      " [-0.0058 -0.0046  0.0422  0.0188 -0.0196]\n",
      " [-0.0007 -0.0016  0.0694 -0.0067 -0.0016]]\n",
      "mean_state_value -0.02133936289563919\n",
      "episode 168/600\n",
      "p1 0.7352000000000001 p0 0.06619999999999997\n",
      "trajectorySteps 21\n",
      "[[3 1 1 1 2]\n",
      " [2 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0372 -0.125  -0.1245 -0.0336 -0.0589]\n",
      " [-0.0261 -0.0378 -0.0568 -0.046  -0.0178]\n",
      " [-0.004  -0.0249 -0.0033 -0.0203 -0.0052]\n",
      " [-0.0057 -0.0046  0.0431  0.0188 -0.0196]\n",
      " [-0.0007 -0.0016  0.0703 -0.0066 -0.0016]]\n",
      "mean_state_value -0.021185692009224055\n",
      "episode 169/600\n",
      "p1 0.7360000000000001 p0 0.06599999999999998\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [4 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0371 -0.1246 -0.1241 -0.0335 -0.0588]\n",
      " [-0.026  -0.0377 -0.0566 -0.0459 -0.0177]\n",
      " [-0.0041 -0.0249 -0.0032 -0.0203 -0.0052]\n",
      " [-0.0057 -0.0046  0.0426  0.0189 -0.0195]\n",
      " [-0.0007 -0.0016  0.0711 -0.0065 -0.0016]]\n",
      "mean_state_value -0.021084722809498104\n",
      "episode 170/600\n",
      "p1 0.7368000000000001 p0 0.06579999999999997\n",
      "trajectorySteps 22\n",
      "[[3 3 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.037  -0.1242 -0.1238 -0.0334 -0.0586]\n",
      " [-0.0259 -0.0376 -0.0564 -0.0457 -0.0176]\n",
      " [-0.0041 -0.0248 -0.0031 -0.0202 -0.0052]\n",
      " [-0.0057 -0.0046  0.0435  0.0189 -0.0195]\n",
      " [-0.0007 -0.0015  0.072  -0.0063 -0.0016]]\n",
      "mean_state_value -0.02092945302818638\n",
      "episode 171/600\n",
      "p1 0.7376 p0 0.06559999999999996\n",
      "trajectorySteps 19\n",
      "[[1 2 2 1 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0369 -0.1239 -0.1234 -0.0333 -0.0584]\n",
      " [-0.0258 -0.0375 -0.0563 -0.0456 -0.0176]\n",
      " [-0.0041 -0.0247 -0.0031 -0.0202 -0.0052]\n",
      " [-0.0057 -0.0046  0.0444  0.0197 -0.0194]\n",
      " [-0.0007 -0.0015  0.0721 -0.007  -0.0016]]\n",
      "mean_state_value -0.02079915026007043\n",
      "episode 172/600\n",
      "p1 0.7384000000000002 p0 0.06539999999999997\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 3 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0368 -0.1235 -0.123  -0.0332 -0.0582]\n",
      " [-0.0258 -0.0374 -0.0561 -0.0455 -0.0175]\n",
      " [-0.0041 -0.0246 -0.003  -0.0201 -0.0052]\n",
      " [-0.0057 -0.0046  0.0453  0.0198 -0.0193]\n",
      " [-0.0007 -0.0015  0.0729 -0.0069 -0.0016]]\n",
      "mean_state_value -0.02064420946668751\n",
      "episode 173/600\n",
      "p1 0.7392000000000001 p0 0.06519999999999997\n",
      "trajectorySteps 15\n",
      "[[0 0 0 0 0]\n",
      " [1 1 1 1 1]\n",
      " [2 2 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0367 -0.1231 -0.1226 -0.0331 -0.058 ]\n",
      " [-0.0263 -0.0379 -0.0559 -0.0453 -0.0175]\n",
      " [-0.0041 -0.0246 -0.0029 -0.02   -0.0051]\n",
      " [-0.0057 -0.0046  0.0462  0.0198 -0.0193]\n",
      " [-0.0007 -0.0015  0.0738 -0.0068 -0.0016]]\n",
      "mean_state_value -0.020534921312947367\n",
      "episode 174/600\n",
      "p1 0.7400000000000001 p0 0.06499999999999997\n",
      "trajectorySteps 21\n",
      "[[2 4 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 3 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0366 -0.1228 -0.1223 -0.033  -0.0579]\n",
      " [-0.0262 -0.0378 -0.0558 -0.0452 -0.0174]\n",
      " [-0.0041 -0.0245 -0.0028 -0.02   -0.0051]\n",
      " [-0.0056 -0.0045  0.0471  0.0198 -0.0192]\n",
      " [-0.0006 -0.0015  0.0746 -0.0068 -0.0015]]\n",
      "mean_state_value -0.02038141320876293\n",
      "episode 175/600\n",
      "p1 0.7408000000000001 p0 0.06479999999999997\n",
      "trajectorySteps 23\n",
      "[[2 2 1 2 4]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 1 2]\n",
      " [0 0 0 0 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0365 -0.1224 -0.1219 -0.0329 -0.0577]\n",
      " [-0.0262 -0.0377 -0.0556 -0.045  -0.0174]\n",
      " [-0.0041 -0.0244 -0.0027 -0.0199 -0.0051]\n",
      " [-0.0056 -0.0045  0.0481  0.0207 -0.0198]\n",
      " [-0.0006 -0.0015  0.0748 -0.0067 -0.0015]]\n",
      "mean_state_value -0.020248515088876724\n",
      "episode 176/600\n",
      "p1 0.7416 p0 0.06459999999999996\n",
      "trajectorySteps 29\n",
      "[[2 3 2 2 2]\n",
      " [2 0 0 0 3]\n",
      " [4 1 0 0 2]\n",
      " [1 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0363 -0.122  -0.1215 -0.0328 -0.0575]\n",
      " [-0.0261 -0.0376 -0.0554 -0.0449 -0.0173]\n",
      " [-0.0041 -0.0243 -0.0027 -0.0199 -0.0051]\n",
      " [-0.0056 -0.0045  0.0476  0.0215 -0.0197]\n",
      " [-0.0006 -0.0015  0.0749 -0.0073 -0.0015]]\n",
      "mean_state_value -0.02016924923849631\n",
      "episode 177/600\n",
      "p1 0.7424000000000002 p0 0.06439999999999997\n",
      "trajectorySteps 20\n",
      "[[0 1 1 1 2]\n",
      " [1 1 0 0 3]\n",
      " [1 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0362 -0.1216 -0.1211 -0.0327 -0.0574]\n",
      " [-0.0266 -0.0374 -0.0552 -0.0448 -0.0173]\n",
      " [-0.0041 -0.0242 -0.0026 -0.0198 -0.0051]\n",
      " [-0.0056 -0.0045  0.0478  0.0215 -0.0197]\n",
      " [-0.0006 -0.0015  0.0758 -0.0072 -0.0015]]\n",
      "mean_state_value -0.02006487646311543\n",
      "episode 178/600\n",
      "p1 0.7432000000000001 p0 0.06419999999999997\n",
      "trajectorySteps 22\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 3]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 3 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0361 -0.1212 -0.1208 -0.0326 -0.0572]\n",
      " [-0.0265 -0.0373 -0.0551 -0.0446 -0.0173]\n",
      " [-0.0041 -0.0242 -0.0025 -0.0197 -0.0051]\n",
      " [-0.0056 -0.0045  0.0487  0.0216 -0.0196]\n",
      " [-0.0006 -0.0015  0.0767 -0.0071 -0.0015]]\n",
      "mean_state_value -0.01990810819400286\n",
      "episode 179/600\n",
      "p1 0.7440000000000001 p0 0.06399999999999997\n",
      "trajectorySteps 23\n",
      "[[2 3 4 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.036  -0.1209 -0.1204 -0.0325 -0.057 ]\n",
      " [-0.0265 -0.0372 -0.0549 -0.0445 -0.0172]\n",
      " [-0.0041 -0.0241 -0.0024 -0.0197 -0.0051]\n",
      " [-0.0055 -0.0045  0.0496  0.0216 -0.0195]\n",
      " [-0.0006 -0.0015  0.0775 -0.0069 -0.0015]]\n",
      "mean_state_value -0.019752200804518318\n",
      "episode 180/600\n",
      "p1 0.7448000000000001 p0 0.06379999999999997\n",
      "trajectorySteps 18\n",
      "[[3 1 1 2 1]\n",
      " [2 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.036  -0.1205 -0.1201 -0.0323 -0.0568]\n",
      " [-0.0264 -0.0371 -0.0547 -0.0443 -0.0172]\n",
      " [-0.0041 -0.024  -0.0023 -0.0196 -0.0051]\n",
      " [-0.0055 -0.0045  0.0505  0.0225 -0.0201]\n",
      " [-0.0006 -0.0015  0.0777 -0.0069 -0.0015]]\n",
      "mean_state_value -0.019619053928922096\n",
      "episode 181/600\n",
      "p1 0.7456 p0 0.06359999999999996\n",
      "trajectorySteps 20\n",
      "[[2 1 1 0 0]\n",
      " [2 1 1 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 1 2]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0359 -0.1201 -0.1202 -0.0322 -0.0566]\n",
      " [-0.027  -0.037  -0.0546 -0.0442 -0.0171]\n",
      " [-0.0041 -0.0239 -0.0023 -0.0196 -0.0051]\n",
      " [-0.0055 -0.0044  0.0514  0.0233 -0.0201]\n",
      " [-0.0006 -0.0015  0.0778 -0.0074 -0.0015]]\n",
      "mean_state_value -0.01953354977732702\n",
      "episode 182/600\n",
      "p1 0.7464000000000002 p0 0.06339999999999997\n",
      "trajectorySteps 21\n",
      "[[1 1 2 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 2]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0358 -0.1197 -0.1199 -0.0321 -0.0565]\n",
      " [-0.0269 -0.0369 -0.0544 -0.0441 -0.017 ]\n",
      " [-0.0041 -0.0239 -0.0022 -0.0195 -0.0051]\n",
      " [-0.0055 -0.0044  0.0523  0.0233 -0.02  ]\n",
      " [-0.0006 -0.0015  0.0787 -0.0079 -0.0015]]\n",
      "mean_state_value -0.019396287031527525\n",
      "episode 183/600\n",
      "p1 0.7472000000000001 p0 0.06319999999999996\n",
      "trajectorySteps 21\n",
      "[[1 1 1 1 1]\n",
      " [3 0 0 0 1]\n",
      " [4 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0357 -0.1194 -0.1195 -0.032  -0.0563]\n",
      " [-0.0268 -0.0367 -0.0542 -0.0439 -0.017 ]\n",
      " [-0.0041 -0.0238 -0.0021 -0.0194 -0.005 ]\n",
      " [-0.0055 -0.0044  0.0533  0.0234 -0.02  ]\n",
      " [-0.0006 -0.0015  0.0795 -0.0078 -0.0014]]\n",
      "mean_state_value -0.019241191852733056\n",
      "episode 184/600\n",
      "p1 0.7480000000000001 p0 0.06299999999999997\n",
      "trajectorySteps 25\n",
      "[[3 1 1 1 2]\n",
      " [1 0 0 0 3]\n",
      " [3 0 0 0 1]\n",
      " [3 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0356 -0.119  -0.1191 -0.0319 -0.0561]\n",
      " [-0.0268 -0.0366 -0.054  -0.0438 -0.017 ]\n",
      " [-0.0041 -0.0237 -0.002  -0.0194 -0.005 ]\n",
      " [-0.0055 -0.0044  0.0535  0.0234 -0.0199]\n",
      " [-0.0006 -0.0014  0.0804 -0.0077 -0.0014]]\n",
      "mean_state_value -0.01911509680205625\n",
      "episode 185/600\n",
      "p1 0.7488000000000001 p0 0.06279999999999997\n",
      "trajectorySteps 27\n",
      "[[1 3 3 1 3]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 1 3]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0355 -0.1186 -0.1188 -0.0318 -0.0559]\n",
      " [-0.0267 -0.0365 -0.0539 -0.0436 -0.0169]\n",
      " [-0.0041 -0.0236 -0.0019 -0.0193 -0.005 ]\n",
      " [-0.0054 -0.0044  0.0544  0.0235 -0.0198]\n",
      " [-0.0006 -0.0014  0.0813 -0.0076 -0.0014]]\n",
      "mean_state_value -0.01896021076241228\n",
      "episode 186/600\n",
      "p1 0.7496 p0 0.06259999999999996\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0354 -0.1183 -0.1184 -0.0317 -0.0558]\n",
      " [-0.0267 -0.0364 -0.0537 -0.0435 -0.0169]\n",
      " [-0.0041 -0.0236 -0.0019 -0.0192 -0.005 ]\n",
      " [-0.0054 -0.0044  0.0553  0.0235 -0.0198]\n",
      " [-0.0006 -0.0014  0.0822 -0.0075 -0.0014]]\n",
      "mean_state_value -0.018803900985883415\n",
      "episode 187/600\n",
      "p1 0.7504000000000002 p0 0.06239999999999997\n",
      "trajectorySteps 30\n",
      "[[2 3 1 0 1]\n",
      " [1 0 1 2 4]\n",
      " [2 1 0 1 4]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0353 -0.1179 -0.1185 -0.0316 -0.0556]\n",
      " [-0.0266 -0.0363 -0.0535 -0.0434 -0.0168]\n",
      " [-0.0041 -0.0235 -0.0018 -0.0192 -0.005 ]\n",
      " [-0.0054 -0.0044  0.0562  0.0235 -0.0197]\n",
      " [-0.0006 -0.0014  0.083  -0.0074 -0.0015]]\n",
      "mean_state_value -0.018668857280456478\n",
      "episode 188/600\n",
      "p1 0.7512000000000001 p0 0.062199999999999964\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [1 2 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0352 -0.1175 -0.1182 -0.0315 -0.0554]\n",
      " [-0.0265 -0.0362 -0.0534 -0.0432 -0.0168]\n",
      " [-0.0041 -0.0234 -0.0017 -0.0191 -0.005 ]\n",
      " [-0.006  -0.005   0.0572  0.0236 -0.0196]\n",
      " [-0.0006 -0.0014  0.0839 -0.0072 -0.0015]]\n",
      "mean_state_value -0.01855749805485348\n",
      "episode 189/600\n",
      "p1 0.7520000000000001 p0 0.061999999999999965\n",
      "trajectorySteps 22\n",
      "[[2 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 3]\n",
      " [0 0 1 1 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0351 -0.1172 -0.1178 -0.0314 -0.0553]\n",
      " [-0.0264 -0.036  -0.0532 -0.0431 -0.0167]\n",
      " [-0.0041 -0.0233 -0.0016 -0.0191 -0.0049]\n",
      " [-0.006  -0.005   0.0581  0.0236 -0.0196]\n",
      " [-0.0006 -0.0014  0.0848 -0.0071 -0.0016]]\n",
      "mean_state_value -0.018399002216762897\n",
      "episode 190/600\n",
      "p1 0.7528000000000001 p0 0.061799999999999966\n",
      "trajectorySteps 28\n",
      "[[1 1 1 1 2]\n",
      " [1 0 1 2 4]\n",
      " [1 0 0 2 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.035  -0.1168 -0.1179 -0.0313 -0.0551]\n",
      " [-0.0263 -0.0359 -0.053  -0.043  -0.0167]\n",
      " [-0.0041 -0.0233 -0.0015 -0.019  -0.0049]\n",
      " [-0.006  -0.0049  0.059   0.0237 -0.0196]\n",
      " [-0.0006 -0.0014  0.0857 -0.0068 -0.0015]]\n",
      "mean_state_value -0.01825485730114363\n",
      "episode 191/600\n",
      "p1 0.7536000000000002 p0 0.06159999999999997\n",
      "trajectorySteps 18\n",
      "[[1 3 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0349 -0.1164 -0.1176 -0.0312 -0.0549]\n",
      " [-0.0262 -0.0358 -0.0528 -0.0428 -0.0167]\n",
      " [-0.0041 -0.0232 -0.0015 -0.0189 -0.0049]\n",
      " [-0.006  -0.0049  0.06    0.0237 -0.0195]\n",
      " [-0.0006 -0.0013  0.0859 -0.0067 -0.0015]]\n",
      "mean_state_value -0.018111330016499335\n",
      "episode 192/600\n",
      "p1 0.7544000000000002 p0 0.06139999999999997\n",
      "trajectorySteps 22\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [4 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 4 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0347 -0.116  -0.1172 -0.0311 -0.0547]\n",
      " [-0.0261 -0.0357 -0.0527 -0.0427 -0.0166]\n",
      " [-0.004  -0.0231 -0.0014 -0.0189 -0.0049]\n",
      " [-0.0059 -0.0049  0.0609  0.0237 -0.0195]\n",
      " [-0.0006 -0.0013  0.0868 -0.0066 -0.0015]]\n",
      "mean_state_value -0.017948774797125193\n",
      "episode 193/600\n",
      "p1 0.7552000000000001 p0 0.06119999999999996\n",
      "trajectorySteps 21\n",
      "[[2 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0347 -0.1156 -0.1168 -0.031  -0.0546]\n",
      " [-0.0261 -0.0356 -0.0525 -0.0425 -0.0165]\n",
      " [-0.004  -0.023  -0.0013 -0.0188 -0.0049]\n",
      " [-0.0059 -0.0049  0.0618  0.0238 -0.0194]\n",
      " [-0.0006 -0.0013  0.0877 -0.0064 -0.0015]]\n",
      "mean_state_value -0.017785913303000705\n",
      "episode 194/600\n",
      "p1 0.7560000000000001 p0 0.060999999999999964\n",
      "trajectorySteps 26\n",
      "[[3 1 2 1 3]\n",
      " [3 0 0 0 1]\n",
      " [4 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0346 -0.1153 -0.1164 -0.0309 -0.0544]\n",
      " [-0.026  -0.0355 -0.0523 -0.0424 -0.0165]\n",
      " [-0.004  -0.023  -0.0012 -0.0188 -0.0049]\n",
      " [-0.0059 -0.0049  0.0628  0.0238 -0.0193]\n",
      " [-0.0006 -0.0013  0.0886 -0.0063 -0.0015]]\n",
      "mean_state_value -0.01762318110760456\n",
      "episode 195/600\n",
      "p1 0.7568000000000001 p0 0.060799999999999965\n",
      "trajectorySteps 22\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 3]\n",
      " [2 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0344 -0.1149 -0.116  -0.0308 -0.0542]\n",
      " [-0.0259 -0.0353 -0.0521 -0.0423 -0.0164]\n",
      " [-0.0041 -0.0229 -0.0011 -0.0187 -0.0048]\n",
      " [-0.0059 -0.0049  0.0637  0.0239 -0.0193]\n",
      " [-0.0006 -0.0013  0.0895 -0.0061 -0.0015]]\n",
      "mean_state_value -0.017463074735405888\n",
      "episode 196/600\n",
      "p1 0.7576000000000002 p0 0.060599999999999966\n",
      "trajectorySteps 21\n",
      "[[1 2 1 1 2]\n",
      " [2 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0343 -0.1145 -0.1156 -0.0307 -0.0541]\n",
      " [-0.0259 -0.0352 -0.052  -0.0421 -0.0164]\n",
      " [-0.0041 -0.0228 -0.0011 -0.0186 -0.0048]\n",
      " [-0.0059 -0.0048  0.0646  0.0239 -0.0192]\n",
      " [-0.0006 -0.0013  0.0904 -0.0059 -0.0015]]\n",
      "mean_state_value -0.017299143881431764\n",
      "episode 197/600\n",
      "p1 0.7584000000000002 p0 0.06039999999999997\n",
      "trajectorySteps 21\n",
      "[[0 0 0 1 1]\n",
      " [1 2 1 1 1]\n",
      " [2 0 0 0 2]\n",
      " [3 0 1 0 1]\n",
      " [1 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0342 -0.1141 -0.1153 -0.0306 -0.0539]\n",
      " [-0.0264 -0.0363 -0.0518 -0.042  -0.0163]\n",
      " [-0.0041 -0.0227 -0.001  -0.0186 -0.0049]\n",
      " [-0.0058 -0.0048  0.0656  0.0239 -0.0191]\n",
      " [-0.0006 -0.0013  0.0913 -0.0057 -0.0015]]\n",
      "mean_state_value -0.01720662896508118\n",
      "episode 198/600\n",
      "p1 0.7592000000000001 p0 0.06019999999999996\n",
      "trajectorySteps 18\n",
      "[[1 1 1 2 1]\n",
      " [2 0 0 0 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0341 -0.1138 -0.1149 -0.0305 -0.0537]\n",
      " [-0.0263 -0.0362 -0.0516 -0.0418 -0.0163]\n",
      " [-0.0041 -0.0227 -0.0009 -0.0185 -0.0049]\n",
      " [-0.0058 -0.0048  0.0665  0.024  -0.0191]\n",
      " [-0.0006 -0.0012  0.0922 -0.0056 -0.0015]]\n",
      "mean_state_value -0.017043353623332744\n",
      "episode 199/600\n",
      "p1 0.7600000000000001 p0 0.05999999999999996\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.034  -0.1134 -0.1145 -0.0304 -0.0535]\n",
      " [-0.0262 -0.036  -0.0515 -0.0417 -0.0162]\n",
      " [-0.0041 -0.0226 -0.0008 -0.0184 -0.0048]\n",
      " [-0.0058 -0.0048  0.0675  0.024  -0.0191]\n",
      " [-0.0006 -0.0012  0.0931 -0.0054 -0.0015]]\n",
      "mean_state_value -0.016882252877977268\n",
      "episode 200/600\n",
      "p1 0.7608000000000001 p0 0.059799999999999964\n",
      "trajectorySteps 18\n",
      "[[1 1 1 3 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0339 -0.113  -0.1141 -0.0304 -0.0533]\n",
      " [-0.0261 -0.0359 -0.0513 -0.0416 -0.0162]\n",
      " [-0.004  -0.0225 -0.0007 -0.0184 -0.0048]\n",
      " [-0.0058 -0.0048  0.0684  0.0249 -0.019 ]\n",
      " [-0.0006 -0.0012  0.0932 -0.006  -0.0015]]\n",
      "mean_state_value -0.016741950321820857\n",
      "episode 201/600\n",
      "p1 0.7616000000000002 p0 0.059599999999999966\n",
      "trajectorySteps 18\n",
      "[[0 1 1 1 1]\n",
      " [1 1 0 0 2]\n",
      " [2 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0338 -0.1126 -0.1137 -0.0302 -0.0532]\n",
      " [-0.0266 -0.0358 -0.0511 -0.0414 -0.0161]\n",
      " [-0.004  -0.0224 -0.0007 -0.0183 -0.0049]\n",
      " [-0.0058 -0.0048  0.0694  0.0249 -0.0189]\n",
      " [-0.0006 -0.0012  0.0941 -0.0058 -0.0014]]\n",
      "mean_state_value -0.016601155022564183\n",
      "episode 202/600\n",
      "p1 0.7624000000000002 p0 0.05939999999999997\n",
      "trajectorySteps 25\n",
      "[[1 1 2 3 5]\n",
      " [1 0 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0336 -0.1122 -0.1139 -0.0301 -0.0531]\n",
      " [-0.0265 -0.0357 -0.0509 -0.0413 -0.0161]\n",
      " [-0.004  -0.0224 -0.0006 -0.0183 -0.0049]\n",
      " [-0.0057 -0.0047  0.069   0.025  -0.0189]\n",
      " [-0.0006 -0.0012  0.095  -0.0057 -0.0014]]\n",
      "mean_state_value -0.01651406265697039\n",
      "episode 203/600\n",
      "p1 0.7632000000000001 p0 0.05919999999999996\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 1 3]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0335 -0.1119 -0.1135 -0.03   -0.0529]\n",
      " [-0.0265 -0.0356 -0.0508 -0.0411 -0.016 ]\n",
      " [-0.004  -0.0223 -0.0005 -0.0182 -0.0049]\n",
      " [-0.0057 -0.0047  0.0699  0.025  -0.0188]\n",
      " [-0.0006 -0.0012  0.0959 -0.0055 -0.0014]]\n",
      "mean_state_value -0.01635157766462792\n",
      "episode 204/600\n",
      "p1 0.7640000000000001 p0 0.05899999999999996\n",
      "trajectorySteps 18\n",
      "[[1 1 1 2 1]\n",
      " [3 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0334 -0.1115 -0.1131 -0.0299 -0.0527]\n",
      " [-0.0264 -0.0354 -0.0506 -0.041  -0.016 ]\n",
      " [-0.004  -0.0222 -0.0004 -0.0181 -0.0049]\n",
      " [-0.0057 -0.0047  0.0709  0.0251 -0.0187]\n",
      " [-0.0006 -0.0012  0.0968 -0.0053 -0.0014]]\n",
      "mean_state_value -0.016187213795208016\n",
      "episode 205/600\n",
      "p1 0.7648000000000001 p0 0.05879999999999996\n",
      "trajectorySteps 21\n",
      "[[2 1 1 1 0]\n",
      " [1 0 0 1 2]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0334 -0.1111 -0.1127 -0.0298 -0.0525]\n",
      " [-0.0263 -0.0353 -0.0504 -0.0409 -0.0159]\n",
      " [-0.004  -0.0221 -0.0003 -0.0181 -0.005 ]\n",
      " [-0.0057 -0.0047  0.0719  0.0251 -0.0187]\n",
      " [-0.0006 -0.0012  0.0977 -0.0052 -0.0014]]\n",
      "mean_state_value -0.01603124288175472\n",
      "episode 206/600\n",
      "p1 0.7656000000000002 p0 0.058599999999999965\n",
      "trajectorySteps 19\n",
      "[[4 2 1 1 0]\n",
      " [2 0 0 1 0]\n",
      " [2 2 1 1 0]\n",
      " [0 1 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0333 -0.1107 -0.1123 -0.0297 -0.0523]\n",
      " [-0.0262 -0.0352 -0.0503 -0.0407 -0.0159]\n",
      " [-0.004  -0.0226  0.0006 -0.0186 -0.0049]\n",
      " [-0.0057 -0.0047  0.0715  0.0251 -0.0187]\n",
      " [-0.0006 -0.0012  0.0979 -0.0052 -0.0014]]\n",
      "mean_state_value -0.015967101114100855\n",
      "episode 207/600\n",
      "p1 0.7664000000000002 p0 0.058399999999999966\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0332 -0.1104 -0.1119 -0.0296 -0.0522]\n",
      " [-0.0261 -0.0351 -0.0501 -0.0406 -0.0158]\n",
      " [-0.0041 -0.0226  0.0007 -0.0185 -0.0049]\n",
      " [-0.0056 -0.0047  0.0724  0.0252 -0.0186]\n",
      " [-0.0006 -0.0012  0.0988 -0.005  -0.0014]]\n",
      "mean_state_value -0.015801989885493507\n",
      "episode 208/600\n",
      "p1 0.7672000000000001 p0 0.05819999999999996\n",
      "trajectorySteps 21\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 1 0 1]\n",
      " [1 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0331 -0.11   -0.1116 -0.0295 -0.052 ]\n",
      " [-0.026  -0.035  -0.0499 -0.0405 -0.0158]\n",
      " [-0.004  -0.0225  0.0007 -0.0185 -0.0049]\n",
      " [-0.0057 -0.0046  0.0721  0.0252 -0.0185]\n",
      " [-0.0006 -0.0012  0.0997 -0.0049 -0.0013]]\n",
      "mean_state_value -0.01569386446472398\n",
      "episode 209/600\n",
      "p1 0.7680000000000001 p0 0.05799999999999996\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 3]\n",
      " [1 0 0 0 3]\n",
      " [0 0 1 1 2]\n",
      " [0 0 0 0 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.033  -0.1096 -0.1112 -0.0294 -0.0518]\n",
      " [-0.0259 -0.0348 -0.0497 -0.0403 -0.0158]\n",
      " [-0.004  -0.0224  0.0008 -0.0184 -0.0049]\n",
      " [-0.0057 -0.0046  0.0717  0.0261 -0.019 ]\n",
      " [-0.0006 -0.0012  0.0999 -0.0049 -0.0013]]\n",
      "mean_state_value -0.015607159811724763\n",
      "episode 210/600\n",
      "p1 0.7688000000000001 p0 0.05779999999999996\n",
      "trajectorySteps 26\n",
      "[[2 1 1 1 1]\n",
      " [2 0 0 0 2]\n",
      " [3 3 0 0 1]\n",
      " [0 2 1 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0329 -0.1092 -0.1108 -0.0293 -0.0516]\n",
      " [-0.0258 -0.0347 -0.0496 -0.0402 -0.0158]\n",
      " [-0.0041 -0.0235  0.0009 -0.0183 -0.0049]\n",
      " [-0.0056 -0.0046  0.0719  0.0262 -0.019 ]\n",
      " [-0.0006 -0.0012  0.1007 -0.0047 -0.0013]]\n",
      "mean_state_value -0.015521303754875385\n",
      "episode 211/600\n",
      "p1 0.7696000000000002 p0 0.057599999999999964\n",
      "trajectorySteps 20\n",
      "[[1 3 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0328 -0.1088 -0.1104 -0.0292 -0.0514]\n",
      " [-0.0257 -0.0346 -0.0494 -0.04   -0.0157]\n",
      " [-0.0041 -0.0234  0.001  -0.0183 -0.0049]\n",
      " [-0.0056 -0.0046  0.0729  0.0262 -0.0189]\n",
      " [-0.0006 -0.0011  0.1017 -0.0045 -0.0013]]\n",
      "mean_state_value -0.015352749441337259\n",
      "episode 212/600\n",
      "p1 0.7704000000000002 p0 0.057399999999999965\n",
      "trajectorySteps 21\n",
      "[[1 1 2 3 2]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 3 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0326 -0.1085 -0.11   -0.0291 -0.0513]\n",
      " [-0.0257 -0.0345 -0.0492 -0.0399 -0.0156]\n",
      " [-0.004  -0.0233  0.0011 -0.0182 -0.0049]\n",
      " [-0.0056 -0.0046  0.0738  0.0262 -0.0188]\n",
      " [-0.0006 -0.0011  0.1025 -0.0043 -0.0013]]\n",
      "mean_state_value -0.015182014718056514\n",
      "episode 213/600\n",
      "p1 0.7712000000000001 p0 0.05719999999999996\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0325 -0.1081 -0.1096 -0.029  -0.0511]\n",
      " [-0.0256 -0.0344 -0.0491 -0.0398 -0.0156]\n",
      " [-0.004  -0.0232  0.0011 -0.0181 -0.0049]\n",
      " [-0.0056 -0.0046  0.0735  0.0263 -0.0188]\n",
      " [-0.0006 -0.0011  0.1035 -0.0041 -0.0013]]\n",
      "mean_state_value -0.015068002596362436\n",
      "episode 214/600\n",
      "p1 0.7720000000000001 p0 0.05699999999999996\n",
      "trajectorySteps 23\n",
      "[[2 1 1 1 0]\n",
      " [2 0 0 2 1]\n",
      " [4 2 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0324 -0.1077 -0.1093 -0.0289 -0.0509]\n",
      " [-0.0255 -0.0342 -0.0489 -0.0396 -0.0155]\n",
      " [-0.0041 -0.0232  0.0012 -0.0181 -0.0049]\n",
      " [-0.0056 -0.0046  0.0744  0.0263 -0.0187]\n",
      " [-0.0006 -0.0011  0.1044 -0.004  -0.0013]]\n",
      "mean_state_value -0.014903576050087727\n",
      "episode 215/600\n",
      "p1 0.7728000000000002 p0 0.05679999999999996\n",
      "trajectorySteps 22\n",
      "[[2 2 1 1 1]\n",
      " [2 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 2 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0323 -0.1078 -0.1089 -0.0288 -0.0508]\n",
      " [-0.0254 -0.0341 -0.0487 -0.0395 -0.0155]\n",
      " [-0.0041 -0.0231  0.0013 -0.018  -0.0048]\n",
      " [-0.0055 -0.0045  0.0754  0.0272 -0.0186]\n",
      " [-0.0006 -0.0011  0.1046 -0.0045 -0.0014]]\n",
      "mean_state_value -0.014785873483671507\n",
      "episode 216/600\n",
      "p1 0.7736000000000002 p0 0.05659999999999996\n",
      "trajectorySteps 22\n",
      "[[1 2 2 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 2]\n",
      " [0 0 1 1 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0322 -0.1074 -0.1085 -0.0287 -0.0506]\n",
      " [-0.0253 -0.034  -0.0485 -0.0393 -0.0155]\n",
      " [-0.0041 -0.023   0.0014 -0.018  -0.0048]\n",
      " [-0.0055 -0.0045  0.0751  0.0273 -0.0186]\n",
      " [-0.0006 -0.0011  0.1055 -0.0043 -0.0014]]\n",
      "mean_state_value -0.01467365319176988\n",
      "episode 217/600\n",
      "p1 0.7744000000000002 p0 0.056399999999999964\n",
      "trajectorySteps 21\n",
      "[[1 2 2 2 1]\n",
      " [2 0 0 0 2]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0321 -0.1071 -0.1081 -0.0287 -0.0504]\n",
      " [-0.0253 -0.0339 -0.0484 -0.0392 -0.0155]\n",
      " [-0.0041 -0.0229  0.0015 -0.0179 -0.0048]\n",
      " [-0.0055 -0.0045  0.0747  0.0273 -0.0185]\n",
      " [-0.0006 -0.0011  0.1064 -0.0042 -0.0014]]\n",
      "mean_state_value -0.014562189450557872\n",
      "episode 218/600\n",
      "p1 0.7752000000000001 p0 0.05619999999999996\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 3]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.032  -0.1067 -0.1077 -0.0286 -0.0502]\n",
      " [-0.0252 -0.0338 -0.0482 -0.0391 -0.0154]\n",
      " [-0.0041 -0.0228  0.0016 -0.0178 -0.0048]\n",
      " [-0.0055 -0.0045  0.0757  0.0273 -0.0184]\n",
      " [-0.0006 -0.0011  0.1073 -0.004  -0.0014]]\n",
      "mean_state_value -0.014392522947201933\n",
      "episode 219/600\n",
      "p1 0.7760000000000001 p0 0.05599999999999996\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0319 -0.1063 -0.1073 -0.0285 -0.0501]\n",
      " [-0.0251 -0.0336 -0.048  -0.0389 -0.0153]\n",
      " [-0.0041 -0.0228  0.0016 -0.0178 -0.0048]\n",
      " [-0.0055 -0.0045  0.0767  0.0274 -0.0184]\n",
      " [-0.0006 -0.0011  0.1083 -0.0038 -0.0013]]\n",
      "mean_state_value -0.014224768029716348\n",
      "episode 220/600\n",
      "p1 0.7768000000000002 p0 0.05579999999999996\n",
      "trajectorySteps 23\n",
      "[[1 1 1 1 1]\n",
      " [3 0 0 0 1]\n",
      " [3 1 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0317 -0.1059 -0.107  -0.0283 -0.0499]\n",
      " [-0.025  -0.0335 -0.0479 -0.0388 -0.0153]\n",
      " [-0.0041 -0.0227  0.0017 -0.0177 -0.0048]\n",
      " [-0.0054 -0.0045  0.0777  0.0274 -0.0183]\n",
      " [-0.0006 -0.0011  0.1092 -0.0036 -0.0013]]\n",
      "mean_state_value -0.014050291054893051\n",
      "episode 221/600\n",
      "p1 0.7776000000000002 p0 0.05559999999999996\n",
      "trajectorySteps 31\n",
      "[[1 3 1 2 4]\n",
      " [1 1 0 0 2]\n",
      " [3 0 0 0 1]\n",
      " [2 0 1 0 1]\n",
      " [3 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0316 -0.106  -0.1066 -0.0282 -0.0497]\n",
      " [-0.025  -0.0334 -0.0477 -0.0386 -0.0152]\n",
      " [-0.0041 -0.0226  0.0018 -0.0176 -0.0047]\n",
      " [-0.0054 -0.0044  0.0787  0.0275 -0.0182]\n",
      " [-0.0006 -0.0011  0.1101 -0.0034 -0.0013]]\n",
      "mean_state_value -0.013905734639365373\n",
      "episode 222/600\n",
      "p1 0.7784000000000002 p0 0.05539999999999996\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 3]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0315 -0.1057 -0.1062 -0.0281 -0.0496]\n",
      " [-0.0249 -0.0333 -0.0475 -0.0385 -0.0152]\n",
      " [-0.0041 -0.0225  0.0019 -0.0176 -0.0047]\n",
      " [-0.0054 -0.0044  0.0797  0.0275 -0.0182]\n",
      " [-0.0006 -0.0011  0.111  -0.0032 -0.0013]]\n",
      "mean_state_value -0.013738577653258716\n",
      "episode 223/600\n",
      "p1 0.7792000000000001 p0 0.05519999999999996\n",
      "trajectorySteps 27\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 1 2]\n",
      " [4 1 0 0 2]\n",
      " [2 0 1 0 1]\n",
      " [0 0 3 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0314 -0.1053 -0.1058 -0.028  -0.0494]\n",
      " [-0.0248 -0.0332 -0.0473 -0.0384 -0.0151]\n",
      " [-0.0041 -0.0224  0.002  -0.0175 -0.0047]\n",
      " [-0.0054 -0.0044  0.0807  0.0276 -0.0181]\n",
      " [-0.0006 -0.0011  0.112  -0.0029 -0.0013]]\n",
      "mean_state_value -0.01356518356300862\n",
      "episode 224/600\n",
      "p1 0.7800000000000001 p0 0.05499999999999996\n",
      "trajectorySteps 20\n",
      "[[1 1 1 0 0]\n",
      " [1 0 1 1 1]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0313 -0.1049 -0.1059 -0.0279 -0.0492]\n",
      " [-0.0247 -0.033  -0.0472 -0.0382 -0.0151]\n",
      " [-0.0041 -0.0224  0.002  -0.0174 -0.0047]\n",
      " [-0.0054 -0.0044  0.0803  0.0276 -0.018 ]\n",
      " [-0.0006 -0.0011  0.1129 -0.0026 -0.0013]]\n",
      "mean_state_value -0.013463507036742034\n",
      "episode 225/600\n",
      "p1 0.7808000000000002 p0 0.05479999999999996\n",
      "trajectorySteps 19\n",
      "[[2 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0312 -0.1045 -0.1055 -0.0278 -0.0491]\n",
      " [-0.0246 -0.0329 -0.047  -0.0381 -0.015 ]\n",
      " [-0.0041 -0.0223  0.0021 -0.0174 -0.0047]\n",
      " [-0.0053 -0.0044  0.08    0.0277 -0.018 ]\n",
      " [-0.0006 -0.001   0.1139 -0.0025 -0.0012]]\n",
      "mean_state_value -0.013347785687319712\n",
      "episode 226/600\n",
      "p1 0.7816000000000002 p0 0.05459999999999996\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0311 -0.1041 -0.1051 -0.0277 -0.0489]\n",
      " [-0.0246 -0.0328 -0.0468 -0.0379 -0.015 ]\n",
      " [-0.0041 -0.0222  0.0022 -0.0173 -0.0047]\n",
      " [-0.0053 -0.0044  0.081   0.0286 -0.0179]\n",
      " [-0.0006 -0.001   0.114  -0.0029 -0.0012]]\n",
      "mean_state_value -0.013200618186732196\n",
      "episode 227/600\n",
      "p1 0.7824000000000002 p0 0.05439999999999996\n",
      "trajectorySteps 16\n",
      "[[0 1 1 2 1]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 1 1 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.031  -0.1038 -0.1047 -0.0277 -0.0487]\n",
      " [-0.025  -0.0327 -0.0467 -0.0378 -0.0149]\n",
      " [-0.0041 -0.0221  0.0023 -0.0173 -0.0046]\n",
      " [-0.0053 -0.0043  0.082   0.0286 -0.0184]\n",
      " [-0.0006 -0.001   0.115  -0.0028 -0.0012]]\n",
      "mean_state_value -0.013072484883814297\n",
      "episode 228/600\n",
      "p1 0.7832000000000001 p0 0.05419999999999996\n",
      "trajectorySteps 21\n",
      "[[1 1 2 2 1]\n",
      " [1 0 0 0 2]\n",
      " [2 1 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0309 -0.1034 -0.1044 -0.0276 -0.0486]\n",
      " [-0.0249 -0.0326 -0.0465 -0.0377 -0.0148]\n",
      " [-0.0041 -0.022   0.0024 -0.0172 -0.0047]\n",
      " [-0.0053 -0.0043  0.083   0.0286 -0.0184]\n",
      " [-0.0006 -0.001   0.1159 -0.0026 -0.0012]]\n",
      "mean_state_value -0.012904735618663135\n",
      "episode 229/600\n",
      "p1 0.7840000000000001 p0 0.05399999999999996\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 1 2]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0307 -0.103  -0.104  -0.0275 -0.0484]\n",
      " [-0.0248 -0.0324 -0.0463 -0.0375 -0.0148]\n",
      " [-0.0041 -0.0219  0.0024 -0.0171 -0.0046]\n",
      " [-0.0053 -0.0043  0.084   0.0296 -0.0183]\n",
      " [-0.0006 -0.001   0.1161 -0.003  -0.0012]]\n",
      "mean_state_value -0.01275455898949193\n",
      "episode 230/600\n",
      "p1 0.7848000000000002 p0 0.05379999999999996\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 1 0 0]\n",
      " [1 1 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0306 -0.1026 -0.1036 -0.0274 -0.0482]\n",
      " [-0.0247 -0.0323 -0.0461 -0.0374 -0.0147]\n",
      " [-0.0041 -0.0224  0.0034 -0.0171 -0.0046]\n",
      " [-0.0058 -0.0043  0.085   0.0296 -0.0182]\n",
      " [-0.0006 -0.001   0.1163 -0.003  -0.0012]]\n",
      "mean_state_value -0.012625667169392514\n",
      "episode 231/600\n",
      "p1 0.7856000000000002 p0 0.05359999999999996\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0305 -0.1022 -0.1032 -0.0273 -0.048 ]\n",
      " [-0.0246 -0.0322 -0.046  -0.0373 -0.0147]\n",
      " [-0.0041 -0.0223  0.0035 -0.017  -0.0046]\n",
      " [-0.0058 -0.0043  0.086   0.0296 -0.0182]\n",
      " [-0.0006 -0.0009  0.1167 -0.0028 -0.0011]]\n",
      "mean_state_value -0.012468977617522123\n",
      "episode 232/600\n",
      "p1 0.7864000000000002 p0 0.05339999999999996\n",
      "trajectorySteps 19\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [4 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0304 -0.1018 -0.1028 -0.0272 -0.0478]\n",
      " [-0.0245 -0.0321 -0.0458 -0.0371 -0.0146]\n",
      " [-0.0042 -0.0222  0.0036 -0.0169 -0.0046]\n",
      " [-0.0057 -0.0043  0.087   0.0297 -0.0181]\n",
      " [-0.0006 -0.0009  0.1176 -0.0026 -0.0011]]\n",
      "mean_state_value -0.01230215374428655\n",
      "episode 233/600\n",
      "p1 0.7872000000000001 p0 0.053199999999999956\n",
      "trajectorySteps 21\n",
      "[[1 1 1 3 4]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0303 -0.1015 -0.1024 -0.0271 -0.0477]\n",
      " [-0.0244 -0.032  -0.0456 -0.037  -0.0146]\n",
      " [-0.0041 -0.0221  0.0036 -0.0169 -0.0046]\n",
      " [-0.0057 -0.0042  0.088   0.0297 -0.0181]\n",
      " [-0.0006 -0.0009  0.1186 -0.0024 -0.0011]]\n",
      "mean_state_value -0.012130075385312194\n",
      "episode 234/600\n",
      "p1 0.7880000000000001 p0 0.05299999999999996\n",
      "trajectorySteps 27\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 3]\n",
      " [3 1 0 1 2]\n",
      " [1 0 1 1 2]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0302 -0.1011 -0.1021 -0.027  -0.0475]\n",
      " [-0.0243 -0.0318 -0.0455 -0.0368 -0.0145]\n",
      " [-0.0041 -0.0221  0.0037 -0.0168 -0.0046]\n",
      " [-0.0057 -0.0042  0.0891  0.0298 -0.018 ]\n",
      " [-0.0006 -0.0009  0.1195 -0.0028 -0.0011]]\n",
      "mean_state_value -0.011977029198947147\n",
      "episode 235/600\n",
      "p1 0.7888000000000002 p0 0.05279999999999996\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0301 -0.1007 -0.1017 -0.0269 -0.0473]\n",
      " [-0.0243 -0.0317 -0.0453 -0.0367 -0.0145]\n",
      " [-0.0041 -0.022   0.0038 -0.0167 -0.0046]\n",
      " [-0.0057 -0.0042  0.0901  0.0298 -0.0179]\n",
      " [-0.0006 -0.0008  0.1205 -0.0026 -0.0011]]\n",
      "mean_state_value -0.01180552009638395\n",
      "episode 236/600\n",
      "p1 0.7896000000000002 p0 0.05259999999999996\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0299 -0.1003 -0.1013 -0.0267 -0.0471]\n",
      " [-0.0242 -0.0316 -0.0451 -0.0366 -0.0144]\n",
      " [-0.0041 -0.0219  0.0039 -0.0167 -0.0046]\n",
      " [-0.0057 -0.0042  0.0903  0.0299 -0.0179]\n",
      " [-0.0006 -0.0008  0.1215 -0.0024 -0.001 ]]\n",
      "mean_state_value -0.011663124898038018\n",
      "episode 237/600\n",
      "p1 0.7904000000000002 p0 0.05239999999999996\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0298 -0.0999 -0.1009 -0.0266 -0.047 ]\n",
      " [-0.0241 -0.0315 -0.0449 -0.0364 -0.0144]\n",
      " [-0.0041 -0.0218  0.004  -0.0166 -0.0046]\n",
      " [-0.0056 -0.0042  0.0913  0.0299 -0.0178]\n",
      " [-0.0006 -0.0008  0.1224 -0.0023 -0.001 ]]\n",
      "mean_state_value -0.011491415396944798\n",
      "episode 238/600\n",
      "p1 0.7912000000000001 p0 0.052199999999999955\n",
      "trajectorySteps 21\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 2]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0297 -0.0996 -0.1005 -0.0265 -0.0468]\n",
      " [-0.024  -0.0314 -0.0448 -0.0363 -0.0143]\n",
      " [-0.0041 -0.0217  0.0041 -0.0166 -0.0045]\n",
      " [-0.0056 -0.0042  0.091   0.03   -0.0177]\n",
      " [-0.0006 -0.0008  0.1234 -0.0021 -0.001 ]]\n",
      "mean_state_value -0.011371563492299543\n",
      "episode 239/600\n",
      "p1 0.7920000000000001 p0 0.051999999999999956\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0296 -0.0992 -0.1001 -0.0264 -0.0466]\n",
      " [-0.0239 -0.0312 -0.0446 -0.0361 -0.0142]\n",
      " [-0.004  -0.0216  0.0041 -0.0165 -0.0045]\n",
      " [-0.0056 -0.0042  0.0921  0.03   -0.0177]\n",
      " [-0.0006 -0.0008  0.1243 -0.0019 -0.001 ]]\n",
      "mean_state_value -0.011197478473255845\n",
      "episode 240/600\n",
      "p1 0.7928000000000002 p0 0.05179999999999996\n",
      "trajectorySteps 23\n",
      "[[2 2 1 1 3]\n",
      " [2 0 0 1 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0295 -0.0988 -0.0997 -0.0263 -0.0464]\n",
      " [-0.0238 -0.0311 -0.0444 -0.036  -0.0142]\n",
      " [-0.004  -0.0216  0.0042 -0.0164 -0.0045]\n",
      " [-0.0056 -0.0041  0.0923  0.0301 -0.0176]\n",
      " [-0.0006 -0.0008  0.1253 -0.0017 -0.001 ]]\n",
      "mean_state_value -0.011054569402335878\n",
      "episode 241/600\n",
      "p1 0.7936000000000002 p0 0.05159999999999996\n",
      "trajectorySteps 22\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 1 3]\n",
      " [3 0 0 1 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0294 -0.0984 -0.0994 -0.0262 -0.0462]\n",
      " [-0.0237 -0.031  -0.0443 -0.0359 -0.0141]\n",
      " [-0.0041 -0.0215  0.0043 -0.0164 -0.0045]\n",
      " [-0.0055 -0.0041  0.0933  0.0301 -0.0175]\n",
      " [-0.0006 -0.0008  0.1263 -0.0015 -0.0009]]\n",
      "mean_state_value -0.010882115623332286\n",
      "episode 242/600\n",
      "p1 0.7944000000000002 p0 0.05139999999999996\n",
      "trajectorySteps 21\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [2 0 1 0 1]\n",
      " [1 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0293 -0.098  -0.099  -0.0261 -0.0461]\n",
      " [-0.0236 -0.0309 -0.0441 -0.0357 -0.0141]\n",
      " [-0.004  -0.0214  0.0044 -0.0163 -0.0045]\n",
      " [-0.0055 -0.0041  0.0931  0.0301 -0.0174]\n",
      " [-0.0006 -0.0008  0.1272 -0.0014 -0.0009]]\n",
      "mean_state_value -0.010760454952297917\n",
      "episode 243/600\n",
      "p1 0.7952000000000001 p0 0.051199999999999954\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0291 -0.0977 -0.0986 -0.026  -0.0459]\n",
      " [-0.0235 -0.0308 -0.0439 -0.0356 -0.014 ]\n",
      " [-0.004  -0.0213  0.0045 -0.0162 -0.0045]\n",
      " [-0.0055 -0.0041  0.0941  0.0302 -0.0174]\n",
      " [-0.0006 -0.0008  0.1282 -0.0012 -0.0009]]\n",
      "mean_state_value -0.010585734838659664\n",
      "episode 244/600\n",
      "p1 0.7960000000000002 p0 0.050999999999999955\n",
      "trajectorySteps 25\n",
      "[[1 1 1 3 3]\n",
      " [1 0 0 1 2]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.029  -0.0973 -0.0982 -0.0259 -0.0457]\n",
      " [-0.0234 -0.0306 -0.0437 -0.0354 -0.014 ]\n",
      " [-0.0041 -0.0212  0.0045 -0.0162 -0.0044]\n",
      " [-0.0055 -0.0041  0.0938  0.0302 -0.0173]\n",
      " [-0.0006 -0.0008  0.1292 -0.0008 -0.0009]]\n",
      "mean_state_value -0.010459457359393036\n",
      "episode 245/600\n",
      "p1 0.7968000000000002 p0 0.050799999999999956\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 2 0 0 1]\n",
      " [0 1 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0289 -0.0969 -0.0978 -0.0258 -0.0456]\n",
      " [-0.0233 -0.0305 -0.0436 -0.0353 -0.0139]\n",
      " [-0.004  -0.0216  0.0046 -0.0161 -0.0044]\n",
      " [-0.0055 -0.0041  0.0948  0.0303 -0.0172]\n",
      " [-0.0006 -0.0008  0.1301 -0.0006 -0.0009]]\n",
      "mean_state_value -0.010304528444632064\n",
      "episode 246/600\n",
      "p1 0.7976000000000002 p0 0.05059999999999996\n",
      "trajectorySteps 17\n",
      "[[1 2 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0288 -0.0969 -0.0974 -0.0257 -0.0454]\n",
      " [-0.0232 -0.0304 -0.0434 -0.0352 -0.0139]\n",
      " [-0.0041 -0.0216  0.0047 -0.016  -0.0044]\n",
      " [-0.0054 -0.004   0.0959  0.0303 -0.0172]\n",
      " [-0.0006 -0.0008  0.1311 -0.0004 -0.0008]]\n",
      "mean_state_value -0.01014839108258609\n",
      "episode 247/600\n",
      "p1 0.7984000000000002 p0 0.05039999999999996\n",
      "trajectorySteps 20\n",
      "[[3 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0287 -0.0966 -0.097  -0.0256 -0.0452]\n",
      " [-0.0232 -0.0303 -0.0432 -0.035  -0.0138]\n",
      " [-0.0041 -0.0215  0.0048 -0.016  -0.0044]\n",
      " [-0.0054 -0.004   0.0969  0.0304 -0.0171]\n",
      " [-0.0006 -0.0008  0.1321 -0.0002 -0.0008]]\n",
      "mean_state_value -0.009974610889203267\n",
      "episode 248/600\n",
      "p1 0.7992000000000001 p0 0.05019999999999995\n",
      "trajectorySteps 22\n",
      "[[1 1 1 2 2]\n",
      " [3 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0286 -0.0962 -0.0967 -0.0255 -0.045 ]\n",
      " [-0.0232 -0.0302 -0.0431 -0.0349 -0.0138]\n",
      " [-0.004  -0.0214  0.0049 -0.0159 -0.0044]\n",
      " [-0.0054 -0.004   0.0979  0.0313 -0.017 ]\n",
      " [-0.0006 -0.0007  0.1323 -0.0007 -0.0008]]\n",
      "mean_state_value -0.009822954221987342\n",
      "episode 249/600\n",
      "p1 0.8000000000000002 p0 0.049999999999999954\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0285 -0.0958 -0.0963 -0.0254 -0.0448]\n",
      " [-0.0231 -0.03   -0.0429 -0.0348 -0.0137]\n",
      " [-0.004  -0.0213  0.005  -0.0159 -0.0043]\n",
      " [-0.0054 -0.004   0.099   0.0314 -0.017 ]\n",
      " [-0.0005 -0.0007  0.1332 -0.0005 -0.0008]]\n",
      "mean_state_value -0.009646937603182687\n",
      "episode 250/600\n",
      "p1 0.8008000000000002 p0 0.049799999999999955\n",
      "trajectorySteps 20\n",
      "[[1 2 2 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0284 -0.0954 -0.0959 -0.0253 -0.0447]\n",
      " [-0.023  -0.0299 -0.0427 -0.0346 -0.0136]\n",
      " [-0.004  -0.0212  0.005  -0.0158 -0.0043]\n",
      " [-0.0054 -0.004   0.1     0.0314 -0.0169]\n",
      " [-0.0005 -0.0007  0.1342 -0.0003 -0.0008]]\n",
      "mean_state_value -0.009473758832628566\n",
      "episode 251/600\n",
      "p1 0.8016000000000002 p0 0.04959999999999996\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 4]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.830e-02 -9.510e-02 -9.550e-02 -2.520e-02 -4.450e-02]\n",
      " [-2.290e-02 -2.980e-02 -4.250e-02 -3.450e-02 -1.360e-02]\n",
      " [-4.000e-03 -2.110e-02  5.100e-03 -1.570e-02 -4.300e-03]\n",
      " [-5.300e-03 -4.000e-03  1.011e-01  3.140e-02 -1.690e-02]\n",
      " [-5.000e-04 -7.000e-04  1.352e-01 -1.000e-04 -8.000e-04]]\n",
      "mean_state_value -0.009301219887133729\n",
      "episode 252/600\n",
      "p1 0.8024000000000002 p0 0.04939999999999996\n",
      "trajectorySteps 21\n",
      "[[2 1 1 1 1]\n",
      " [3 0 0 1 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.820e-02 -9.470e-02 -9.510e-02 -2.510e-02 -4.430e-02]\n",
      " [-2.280e-02 -2.970e-02 -4.240e-02 -3.430e-02 -1.350e-02]\n",
      " [-4.000e-03 -2.110e-02  5.200e-03 -1.570e-02 -4.300e-03]\n",
      " [-5.300e-03 -3.900e-03  1.021e-01  3.150e-02 -1.690e-02]\n",
      " [-5.000e-04 -7.000e-04  1.362e-01  1.000e-04 -8.000e-04]]\n",
      "mean_state_value -0.009124313769572921\n",
      "episode 253/600\n",
      "p1 0.8032000000000001 p0 0.04919999999999995\n",
      "trajectorySteps 18\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0281 -0.0943 -0.0947 -0.025  -0.0441]\n",
      " [-0.0227 -0.0296 -0.0422 -0.0342 -0.0135]\n",
      " [-0.004  -0.021   0.0053 -0.0156 -0.0043]\n",
      " [-0.0053 -0.0039  0.1031  0.0315 -0.0168]\n",
      " [-0.0005 -0.0006  0.1367  0.0004 -0.0008]]\n",
      "mean_state_value -0.008963833136573751\n",
      "episode 254/600\n",
      "p1 0.8040000000000002 p0 0.04899999999999995\n",
      "trajectorySteps 22\n",
      "[[1 1 1 2 3]\n",
      " [1 0 0 1 3]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.790e-02 -9.390e-02 -9.430e-02 -2.490e-02 -4.400e-02]\n",
      " [-2.260e-02 -2.940e-02 -4.200e-02 -3.410e-02 -1.350e-02]\n",
      " [-4.000e-03 -2.090e-02  5.400e-03 -1.550e-02 -4.300e-03]\n",
      " [-5.300e-03 -3.900e-03  1.042e-01  3.250e-02 -1.670e-02]\n",
      " [-5.000e-04 -6.000e-04  1.369e-01 -1.000e-04 -7.000e-04]]\n",
      "mean_state_value -0.00880936116392769\n",
      "episode 255/600\n",
      "p1 0.8048000000000002 p0 0.048799999999999955\n",
      "trajectorySteps 22\n",
      "[[1 1 2 2 3]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0278 -0.0935 -0.094  -0.0248 -0.0438]\n",
      " [-0.0225 -0.0293 -0.0419 -0.0339 -0.0134]\n",
      " [-0.004  -0.0208  0.0054 -0.0155 -0.0043]\n",
      " [-0.0052 -0.0039  0.1052  0.0325 -0.0166]\n",
      " [-0.0005 -0.0005  0.1379  0.0002 -0.0007]]\n",
      "mean_state_value -0.008635191585972162\n",
      "episode 256/600\n",
      "p1 0.8056000000000002 p0 0.048599999999999956\n",
      "trajectorySteps 19\n",
      "[[1 1 2 1 2]\n",
      " [1 0 0 1 2]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0277 -0.0932 -0.0936 -0.0247 -0.0437]\n",
      " [-0.0224 -0.0292 -0.0417 -0.0338 -0.0133]\n",
      " [-0.004  -0.0207  0.0055 -0.0154 -0.0043]\n",
      " [-0.0052 -0.0039  0.1055  0.0326 -0.0166]\n",
      " [-0.0005 -0.0005  0.1388  0.0004 -0.0007]]\n",
      "mean_state_value -0.008491390823120972\n",
      "episode 257/600\n",
      "p1 0.8064000000000002 p0 0.04839999999999996\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0276 -0.0928 -0.0932 -0.0246 -0.0435]\n",
      " [-0.0223 -0.0291 -0.0415 -0.0336 -0.0133]\n",
      " [-0.0039 -0.0206  0.0056 -0.0154 -0.0043]\n",
      " [-0.0052 -0.0039  0.1065  0.0326 -0.0165]\n",
      " [-0.0005 -0.0004  0.1394  0.0006 -0.0007]]\n",
      "mean_state_value -0.00832680049377309\n",
      "episode 258/600\n",
      "p1 0.8072000000000001 p0 0.04819999999999995\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 2]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0275 -0.0924 -0.0928 -0.0245 -0.0433]\n",
      " [-0.0222 -0.029  -0.0413 -0.0335 -0.0132]\n",
      " [-0.0039 -0.0205  0.0057 -0.0153 -0.0043]\n",
      " [-0.0052 -0.0038  0.1076  0.0327 -0.0164]\n",
      " [-0.0005 -0.0004  0.1404  0.0003 -0.0006]]\n",
      "mean_state_value -0.008167621352165137\n",
      "episode 259/600\n",
      "p1 0.8080000000000002 p0 0.04799999999999995\n",
      "trajectorySteps 22\n",
      "[[2 1 1 1 2]\n",
      " [3 0 0 0 2]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0274 -0.092  -0.0924 -0.0244 -0.0431]\n",
      " [-0.0221 -0.0288 -0.0412 -0.0334 -0.0132]\n",
      " [-0.0039 -0.0205  0.0058 -0.0152 -0.0043]\n",
      " [-0.0052 -0.0038  0.1086  0.0327 -0.0164]\n",
      " [-0.0005 -0.0004  0.1413  0.0005 -0.0006]]\n",
      "mean_state_value -0.00798958517899677\n",
      "episode 260/600\n",
      "p1 0.8088000000000002 p0 0.047799999999999954\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0273 -0.0916 -0.092  -0.0243 -0.0429]\n",
      " [-0.022  -0.0287 -0.041  -0.0332 -0.0131]\n",
      " [-0.0039 -0.0204  0.0059 -0.0152 -0.0042]\n",
      " [-0.0051 -0.0038  0.1097  0.0328 -0.0163]\n",
      " [-0.0005 -0.0004  0.1423  0.0007 -0.0006]]\n",
      "mean_state_value -0.007811381616469678\n",
      "episode 261/600\n",
      "p1 0.8096000000000002 p0 0.047599999999999955\n",
      "trajectorySteps 27\n",
      "[[2 1 1 1 1]\n",
      " [2 0 0 2 3]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 1 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0271 -0.0912 -0.0917 -0.0242 -0.0428]\n",
      " [-0.022  -0.0286 -0.0408 -0.0331 -0.0131]\n",
      " [-0.0039 -0.0203  0.0059 -0.0151 -0.0042]\n",
      " [-0.0051 -0.0038  0.1108  0.0328 -0.0162]\n",
      " [-0.0005 -0.0002  0.1429  0.001  -0.0006]]\n",
      "mean_state_value -0.007646655854215959\n",
      "episode 262/600\n",
      "p1 0.8104000000000002 p0 0.047399999999999956\n",
      "trajectorySteps 17\n",
      "[[1 2 1 1 1]\n",
      " [2 1 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.027  -0.0913 -0.0913 -0.0241 -0.0426]\n",
      " [-0.0219 -0.0285 -0.0407 -0.0329 -0.013 ]\n",
      " [-0.0039 -0.0202  0.006  -0.015  -0.0042]\n",
      " [-0.0051 -0.0038  0.1118  0.0329 -0.0162]\n",
      " [-0.0005 -0.0002  0.1439  0.0012 -0.0005]]\n",
      "mean_state_value -0.007484240577556042\n",
      "episode 263/600\n",
      "p1 0.8112000000000001 p0 0.04719999999999995\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0269 -0.0909 -0.0909 -0.024  -0.0424]\n",
      " [-0.0218 -0.0284 -0.0405 -0.0328 -0.013 ]\n",
      " [-0.0039 -0.0201  0.0061 -0.015  -0.0042]\n",
      " [-0.0051 -0.0038  0.1129  0.0329 -0.0161]\n",
      " [-0.0005 -0.0002  0.1449  0.0014 -0.0005]]\n",
      "mean_state_value -0.007307099017186914\n",
      "episode 264/600\n",
      "p1 0.8120000000000002 p0 0.04699999999999995\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0268 -0.0905 -0.0905 -0.0239 -0.0422]\n",
      " [-0.0217 -0.0282 -0.0403 -0.0327 -0.0129]\n",
      " [-0.004  -0.02    0.0062 -0.0149 -0.0042]\n",
      " [-0.0051 -0.0038  0.1139  0.033  -0.016 ]\n",
      " [-0.0005 -0.0002  0.1459  0.0016 -0.0005]]\n",
      "mean_state_value -0.007129770782111976\n",
      "episode 265/600\n",
      "p1 0.8128000000000002 p0 0.04679999999999995\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0267 -0.0901 -0.0901 -0.0238 -0.042 ]\n",
      " [-0.0216 -0.0281 -0.0401 -0.0325 -0.0129]\n",
      " [-0.0039 -0.0199  0.0063 -0.0148 -0.0042]\n",
      " [-0.005  -0.0037  0.115   0.033  -0.0159]\n",
      " [-0.0005 -0.0002  0.1469  0.0018 -0.0005]]\n",
      "mean_state_value -0.006950541824964277\n",
      "episode 266/600\n",
      "p1 0.8136000000000002 p0 0.046599999999999954\n",
      "trajectorySteps 17\n",
      "[[1 1 1 3 2]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0266 -0.0897 -0.0897 -0.0237 -0.0419]\n",
      " [-0.0215 -0.028  -0.04   -0.0324 -0.0128]\n",
      " [-0.0039 -0.0199  0.0063 -0.0148 -0.0041]\n",
      " [-0.005  -0.0037  0.1161  0.033  -0.0159]\n",
      " [-0.0005 -0.0002  0.1479  0.002  -0.0004]]\n",
      "mean_state_value -0.006771142539347036\n",
      "episode 267/600\n",
      "p1 0.8144000000000002 p0 0.046399999999999955\n",
      "trajectorySteps 19\n",
      "[[2 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0265 -0.0893 -0.0893 -0.0236 -0.0417]\n",
      " [-0.0214 -0.0279 -0.0398 -0.0322 -0.0127]\n",
      " [-0.0039 -0.0198  0.0064 -0.0147 -0.0041]\n",
      " [-0.005  -0.0037  0.1171  0.0331 -0.0158]\n",
      " [-0.0005 -0.0002  0.1489  0.0023 -0.0004]]\n",
      "mean_state_value -0.006590970486103607\n",
      "episode 268/600\n",
      "p1 0.8152000000000001 p0 0.04619999999999995\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.630e-02 -8.900e-02 -8.900e-02 -2.350e-02 -4.150e-02]\n",
      " [-2.130e-02 -2.780e-02 -3.960e-02 -3.210e-02 -1.270e-02]\n",
      " [-3.900e-03 -1.970e-02  6.500e-03 -1.470e-02 -4.100e-03]\n",
      " [-5.000e-03 -3.700e-03  1.182e-01  3.310e-02 -1.570e-02]\n",
      " [-5.000e-04 -1.000e-04  1.499e-01  2.500e-03 -4.000e-04]]\n",
      "mean_state_value -0.006411229696054867\n",
      "episode 269/600\n",
      "p1 0.8160000000000002 p0 0.04599999999999995\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.620e-02 -8.860e-02 -8.860e-02 -2.340e-02 -4.130e-02]\n",
      " [-2.120e-02 -2.760e-02 -3.950e-02 -3.200e-02 -1.260e-02]\n",
      " [-3.900e-03 -1.960e-02  6.600e-03 -1.460e-02 -4.100e-03]\n",
      " [-4.900e-03 -3.700e-03  1.193e-01  3.320e-02 -1.570e-02]\n",
      " [-5.000e-04 -1.000e-04  1.509e-01  2.700e-03 -4.000e-04]]\n",
      "mean_state_value -0.006231318301384585\n",
      "episode 270/600\n",
      "p1 0.8168000000000002 p0 0.04579999999999995\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.610e-02 -8.820e-02 -8.820e-02 -2.330e-02 -4.110e-02]\n",
      " [-2.110e-02 -2.750e-02 -3.930e-02 -3.180e-02 -1.260e-02]\n",
      " [-3.900e-03 -1.950e-02  6.700e-03 -1.450e-02 -4.100e-03]\n",
      " [-4.900e-03 -3.700e-03  1.203e-01  3.320e-02 -1.560e-02]\n",
      " [-5.000e-04 -1.000e-04  1.519e-01  2.900e-03 -3.000e-04]]\n",
      "mean_state_value -0.006051236209497698\n",
      "episode 271/600\n",
      "p1 0.8176000000000002 p0 0.04559999999999995\n",
      "trajectorySteps 20\n",
      "[[2 2 2 1 1]\n",
      " [2 0 0 0 2]\n",
      " [2 1 0 0 2]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.600e-02 -8.780e-02 -8.780e-02 -2.320e-02 -4.100e-02]\n",
      " [-2.110e-02 -2.740e-02 -3.910e-02 -3.170e-02 -1.250e-02]\n",
      " [-3.800e-03 -1.940e-02  6.800e-03 -1.450e-02 -4.000e-03]\n",
      " [-4.900e-03 -3.600e-03  1.201e-01  3.420e-02 -1.600e-02]\n",
      " [-5.000e-04 -1.000e-04  1.521e-01  3.000e-03 -3.000e-04]]\n",
      "mean_state_value -0.0059423035114768786\n",
      "episode 272/600\n",
      "p1 0.8184000000000002 p0 0.045399999999999954\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.590e-02 -8.740e-02 -8.740e-02 -2.310e-02 -4.080e-02]\n",
      " [-2.100e-02 -2.730e-02 -3.890e-02 -3.160e-02 -1.250e-02]\n",
      " [-3.800e-03 -1.930e-02  6.800e-03 -1.440e-02 -4.000e-03]\n",
      " [-4.900e-03 -3.600e-03  1.212e-01  3.430e-02 -1.590e-02]\n",
      " [-5.000e-04 -1.000e-04  1.532e-01  3.200e-03 -3.000e-04]]\n",
      "mean_state_value -0.005761781431482037\n",
      "episode 273/600\n",
      "p1 0.8192000000000002 p0 0.04519999999999995\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.580e-02 -8.700e-02 -8.700e-02 -2.300e-02 -4.060e-02]\n",
      " [-2.090e-02 -2.720e-02 -3.880e-02 -3.140e-02 -1.240e-02]\n",
      " [-3.800e-03 -1.930e-02  6.900e-03 -1.430e-02 -4.000e-03]\n",
      " [-4.900e-03 -3.600e-03  1.223e-01  3.430e-02 -1.580e-02]\n",
      " [-5.000e-04 -1.000e-04  1.542e-01  3.500e-03 -3.000e-04]]\n",
      "mean_state_value -0.0055810387399062135\n",
      "episode 274/600\n",
      "p1 0.8200000000000002 p0 0.04499999999999995\n",
      "trajectorySteps 21\n",
      "[[4 1 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.580e-02 -8.660e-02 -8.670e-02 -2.290e-02 -4.040e-02]\n",
      " [-2.080e-02 -2.700e-02 -3.860e-02 -3.130e-02 -1.240e-02]\n",
      " [-3.800e-03 -1.920e-02  7.000e-03 -1.430e-02 -4.000e-03]\n",
      " [-4.800e-03 -3.600e-03  1.221e-01  3.440e-02 -1.580e-02]\n",
      " [-5.000e-04 -1.000e-04  1.552e-01  3.700e-03 -3.000e-04]]\n",
      "mean_state_value -0.005457324893293539\n",
      "episode 275/600\n",
      "p1 0.8208000000000002 p0 0.04479999999999995\n",
      "trajectorySteps 29\n",
      "[[2 2 1 2 0]\n",
      " [3 0 0 1 1]\n",
      " [7 2 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.570e-02 -8.630e-02 -8.630e-02 -2.280e-02 -4.020e-02]\n",
      " [-2.070e-02 -2.690e-02 -3.840e-02 -3.110e-02 -1.230e-02]\n",
      " [-3.900e-03 -1.910e-02  7.100e-03 -1.420e-02 -4.000e-03]\n",
      " [-4.800e-03 -3.600e-03  1.232e-01  3.440e-02 -1.570e-02]\n",
      " [-5.000e-04 -1.000e-04  1.562e-01  3.900e-03 -3.000e-04]]\n",
      "mean_state_value -0.005280602294137178\n",
      "episode 276/600\n",
      "p1 0.8216000000000002 p0 0.04459999999999995\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.560e-02 -8.590e-02 -8.590e-02 -2.270e-02 -4.010e-02]\n",
      " [-2.060e-02 -2.680e-02 -3.830e-02 -3.100e-02 -1.230e-02]\n",
      " [-3.800e-03 -1.900e-02  7.200e-03 -1.410e-02 -4.000e-03]\n",
      " [-4.800e-03 -3.600e-03  1.243e-01  3.450e-02 -1.560e-02]\n",
      " [-5.000e-04 -1.000e-04  1.572e-01  4.100e-03 -3.000e-04]]\n",
      "mean_state_value -0.00509897211666404\n",
      "episode 277/600\n",
      "p1 0.8224000000000002 p0 0.04439999999999995\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.540e-02 -8.550e-02 -8.550e-02 -2.260e-02 -3.990e-02]\n",
      " [-2.050e-02 -2.670e-02 -3.810e-02 -3.090e-02 -1.220e-02]\n",
      " [-3.800e-03 -1.890e-02  7.200e-03 -1.410e-02 -3.900e-03]\n",
      " [-4.800e-03 -3.500e-03  1.254e-01  3.450e-02 -1.550e-02]\n",
      " [-5.000e-04 -1.000e-04  1.583e-01  4.400e-03 -2.000e-04]]\n",
      "mean_state_value -0.0049175325736032964\n",
      "episode 278/600\n",
      "p1 0.8232000000000002 p0 0.04419999999999995\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.530e-02 -8.510e-02 -8.510e-02 -2.250e-02 -3.970e-02]\n",
      " [-2.040e-02 -2.660e-02 -3.790e-02 -3.070e-02 -1.210e-02]\n",
      " [-3.800e-03 -1.880e-02  7.300e-03 -1.400e-02 -3.900e-03]\n",
      " [-4.800e-03 -3.500e-03  1.264e-01  3.450e-02 -1.550e-02]\n",
      " [-5.000e-04 -1.000e-04  1.593e-01  4.600e-03 -2.000e-04]]\n",
      "mean_state_value -0.004735921718780334\n",
      "episode 279/600\n",
      "p1 0.8240000000000002 p0 0.04399999999999995\n",
      "trajectorySteps 19\n",
      "[[1 1 1 3 2]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.520e-02 -8.470e-02 -8.470e-02 -2.240e-02 -3.950e-02]\n",
      " [-2.030e-02 -2.640e-02 -3.770e-02 -3.060e-02 -1.210e-02]\n",
      " [-3.800e-03 -1.880e-02  7.400e-03 -1.400e-02 -3.900e-03]\n",
      " [-4.700e-03 -3.500e-03  1.275e-01  3.460e-02 -1.540e-02]\n",
      " [-5.000e-04 -1.000e-04  1.603e-01  4.800e-03 -2.000e-04]]\n",
      "mean_state_value -0.00455413945967714\n",
      "episode 280/600\n",
      "p1 0.8248000000000002 p0 0.04379999999999995\n",
      "trajectorySteps 21\n",
      "[[2 2 1 1 1]\n",
      " [2 1 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0847 -0.0844 -0.0223 -0.0393]\n",
      " [-0.0202 -0.0263 -0.0376 -0.0304 -0.012 ]\n",
      " [-0.0038 -0.0187  0.0075 -0.0139 -0.0039]\n",
      " [-0.0047 -0.0035  0.1286  0.0346 -0.0153]\n",
      " [-0.0005 -0.      0.1613  0.0051 -0.0002]]\n",
      "mean_state_value -0.004388760078885295\n",
      "episode 281/600\n",
      "p1 0.8256000000000002 p0 0.04359999999999995\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.500e-02 -8.430e-02 -8.400e-02 -2.220e-02 -3.920e-02]\n",
      " [-2.020e-02 -2.620e-02 -3.740e-02 -3.030e-02 -1.200e-02]\n",
      " [-3.800e-03 -1.860e-02  7.600e-03 -1.380e-02 -3.900e-03]\n",
      " [-4.700e-03 -3.500e-03  1.297e-01  3.470e-02 -1.530e-02]\n",
      " [-5.000e-04 -0.000e+00  1.624e-01  5.300e-03 -1.000e-04]]\n",
      "mean_state_value -0.004206559053384704\n",
      "episode 282/600\n",
      "p1 0.8264000000000002 p0 0.04339999999999995\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.490e-02 -8.390e-02 -8.360e-02 -2.210e-02 -3.900e-02]\n",
      " [-2.010e-02 -2.610e-02 -3.720e-02 -3.020e-02 -1.190e-02]\n",
      " [-3.800e-03 -1.850e-02  7.700e-03 -1.380e-02 -3.900e-03]\n",
      " [-4.700e-03 -3.500e-03  1.308e-01  3.470e-02 -1.520e-02]\n",
      " [-5.000e-04 -0.000e+00  1.634e-01  5.700e-03 -1.000e-04]]\n",
      "mean_state_value -0.004020517552471121\n",
      "episode 283/600\n",
      "p1 0.8272000000000002 p0 0.04319999999999995\n",
      "trajectorySteps 20\n",
      "[[2 2 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.480e-02 -8.360e-02 -8.320e-02 -2.200e-02 -3.880e-02]\n",
      " [-2.000e-02 -2.600e-02 -3.710e-02 -3.000e-02 -1.190e-02]\n",
      " [-3.800e-03 -1.840e-02  7.700e-03 -1.370e-02 -3.800e-03]\n",
      " [-4.600e-03 -3.500e-03  1.319e-01  3.480e-02 -1.510e-02]\n",
      " [-5.000e-04 -0.000e+00  1.644e-01  5.900e-03 -1.000e-04]]\n",
      "mean_state_value -0.0038399489958407906\n",
      "episode 284/600\n",
      "p1 0.8280000000000002 p0 0.04299999999999995\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 4]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.470e-02 -8.320e-02 -8.280e-02 -2.190e-02 -3.860e-02]\n",
      " [-1.990e-02 -2.580e-02 -3.690e-02 -2.990e-02 -1.180e-02]\n",
      " [-3.800e-03 -1.830e-02  7.800e-03 -1.360e-02 -3.800e-03]\n",
      " [-4.600e-03 -3.400e-03  1.317e-01  3.480e-02 -1.500e-02]\n",
      " [-5.000e-04 -0.000e+00  1.654e-01  6.100e-03 -1.000e-04]]\n",
      "mean_state_value -0.0037103877840878453\n",
      "episode 285/600\n",
      "p1 0.8288000000000002 p0 0.04279999999999995\n",
      "trajectorySteps 42\n",
      "[[6 2 2 2 5]\n",
      " [3 0 0 0 2]\n",
      " [3 0 0 0 2]\n",
      " [1 0 1 0 3]\n",
      " [3 1 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.460e-02 -8.280e-02 -8.240e-02 -2.180e-02 -3.850e-02]\n",
      " [-1.980e-02 -2.570e-02 -3.670e-02 -2.970e-02 -1.180e-02]\n",
      " [-3.800e-03 -1.820e-02  7.900e-03 -1.360e-02 -3.800e-03]\n",
      " [-4.600e-03 -3.400e-03  1.320e-01  3.490e-02 -1.500e-02]\n",
      " [-6.000e-04 -0.000e+00  1.661e-01  6.500e-03 -1.000e-04]]\n",
      "mean_state_value -0.0035788881240301908\n",
      "episode 286/600\n",
      "p1 0.8296000000000002 p0 0.04259999999999995\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0824 -0.082  -0.0217 -0.0383]\n",
      " [-0.0197 -0.0256 -0.0365 -0.0296 -0.0117]\n",
      " [-0.0038 -0.0182  0.008  -0.0135 -0.0038]\n",
      " [-0.0046 -0.0034  0.1331  0.0349 -0.0149]\n",
      " [-0.0006  0.      0.1671  0.0068 -0.    ]]\n",
      "mean_state_value -0.003397261451574608\n",
      "episode 287/600\n",
      "p1 0.8304000000000002 p0 0.04239999999999995\n",
      "trajectorySteps 20\n",
      "[[3 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.440e-02 -8.200e-02 -8.170e-02 -2.160e-02 -3.810e-02]\n",
      " [-1.960e-02 -2.550e-02 -3.640e-02 -2.950e-02 -1.160e-02]\n",
      " [-3.800e-03 -1.810e-02  8.100e-03 -1.340e-02 -3.800e-03]\n",
      " [-4.600e-03 -3.400e-03  1.342e-01  3.500e-02 -1.480e-02]\n",
      " [-6.000e-04  0.000e+00  1.681e-01  7.000e-03 -1.000e-04]]\n",
      "mean_state_value -0.0032155571217556207\n",
      "episode 288/600\n",
      "p1 0.8312000000000002 p0 0.042199999999999946\n",
      "trajectorySteps 19\n",
      "[[2 2 1 3 2]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0816 -0.0813 -0.0215 -0.038 ]\n",
      " [-0.0195 -0.0254 -0.0362 -0.0293 -0.0116]\n",
      " [-0.0037 -0.018   0.0081 -0.0134 -0.0037]\n",
      " [-0.0045 -0.0034  0.1353  0.035  -0.0147]\n",
      " [-0.0005  0.      0.1692  0.0073 -0.    ]]\n",
      "mean_state_value -0.0030320170593820224\n",
      "episode 289/600\n",
      "p1 0.8320000000000002 p0 0.04199999999999995\n",
      "trajectorySteps 36\n",
      "[[2 2 2 2 2]\n",
      " [2 0 2 2 3]\n",
      " [3 0 0 0 2]\n",
      " [1 0 1 0 2]\n",
      " [1 1 2 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0813 -0.0809 -0.0214 -0.0378]\n",
      " [-0.0194 -0.0252 -0.0364 -0.0296 -0.0115]\n",
      " [-0.0037 -0.0179  0.0082 -0.0133 -0.0037]\n",
      " [-0.0045 -0.0034  0.1352  0.0351 -0.0147]\n",
      " [-0.0005  0.      0.1698  0.0077  0.    ]]\n",
      "mean_state_value -0.002939102660811097\n",
      "episode 290/600\n",
      "p1 0.8328000000000002 p0 0.04179999999999995\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 3]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0809 -0.0805 -0.0213 -0.0376]\n",
      " [-0.0193 -0.0251 -0.0363 -0.0294 -0.0115]\n",
      " [-0.0037 -0.0178  0.0083 -0.0133 -0.0037]\n",
      " [-0.0045 -0.0033  0.1363  0.0351 -0.0146]\n",
      " [-0.0005  0.      0.1708  0.0079  0.    ]]\n",
      "mean_state_value -0.0027549632052739207\n",
      "episode 291/600\n",
      "p1 0.8336000000000002 p0 0.04159999999999995\n",
      "trajectorySteps 19\n",
      "[[1 2 3 2 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.390e-02 -8.050e-02 -8.010e-02 -2.120e-02 -3.740e-02]\n",
      " [-1.920e-02 -2.500e-02 -3.610e-02 -2.930e-02 -1.140e-02]\n",
      " [-3.700e-03 -1.770e-02  8.400e-03 -1.320e-02 -3.700e-03]\n",
      " [-4.500e-03 -3.300e-03  1.374e-01  3.520e-02 -1.450e-02]\n",
      " [-5.000e-04  0.000e+00  1.719e-01  8.100e-03  1.000e-04]]\n",
      "mean_state_value -0.0025706554926141615\n",
      "episode 292/600\n",
      "p1 0.8344000000000003 p0 0.04139999999999995\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.380e-02 -8.010e-02 -7.970e-02 -2.110e-02 -3.720e-02]\n",
      " [-1.910e-02 -2.490e-02 -3.590e-02 -2.920e-02 -1.140e-02]\n",
      " [-3.700e-03 -1.760e-02  8.500e-03 -1.310e-02 -3.700e-03]\n",
      " [-4.500e-03 -3.300e-03  1.385e-01  3.520e-02 -1.450e-02]\n",
      " [-5.000e-04  0.000e+00  1.729e-01  8.400e-03  1.000e-04]]\n",
      "mean_state_value -0.002386175581103235\n",
      "episode 293/600\n",
      "p1 0.8352000000000002 p0 0.041199999999999945\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 3]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.370e-02 -7.970e-02 -7.940e-02 -2.100e-02 -3.710e-02]\n",
      " [-1.900e-02 -2.480e-02 -3.570e-02 -2.900e-02 -1.130e-02]\n",
      " [-3.700e-03 -1.760e-02  8.600e-03 -1.310e-02 -3.700e-03]\n",
      " [-4.400e-03 -3.300e-03  1.388e-01  3.530e-02 -1.440e-02]\n",
      " [-5.000e-04  1.000e-04  1.740e-01  8.600e-03  1.000e-04]]\n",
      "mean_state_value -0.0022345365763818516\n",
      "episode 294/600\n",
      "p1 0.8360000000000002 p0 0.040999999999999946\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.360e-02 -7.930e-02 -7.900e-02 -2.090e-02 -3.690e-02]\n",
      " [-1.900e-02 -2.460e-02 -3.560e-02 -2.890e-02 -1.130e-02]\n",
      " [-3.600e-03 -1.750e-02  8.600e-03 -1.300e-02 -3.600e-03]\n",
      " [-4.400e-03 -3.300e-03  1.399e-01  3.530e-02 -1.440e-02]\n",
      " [-5.000e-04  1.000e-04  1.750e-01  8.900e-03  2.000e-04]]\n",
      "mean_state_value -0.002051356582577867\n",
      "episode 295/600\n",
      "p1 0.8368000000000002 p0 0.04079999999999995\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.350e-02 -7.890e-02 -7.860e-02 -2.080e-02 -3.670e-02]\n",
      " [-1.890e-02 -2.450e-02 -3.540e-02 -2.870e-02 -1.120e-02]\n",
      " [-3.600e-03 -1.740e-02  8.700e-03 -1.290e-02 -3.600e-03]\n",
      " [-4.400e-03 -3.300e-03  1.410e-01  3.630e-02 -1.430e-02]\n",
      " [-5.000e-04  1.000e-04  1.753e-01  8.600e-03  2.000e-04]]\n",
      "mean_state_value -0.0018828349369955971\n",
      "episode 296/600\n",
      "p1 0.8376000000000002 p0 0.04059999999999995\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.340e-02 -7.850e-02 -7.820e-02 -2.070e-02 -3.650e-02]\n",
      " [-1.880e-02 -2.440e-02 -3.520e-02 -2.860e-02 -1.120e-02]\n",
      " [-3.600e-03 -1.730e-02  8.800e-03 -1.290e-02 -3.600e-03]\n",
      " [-4.400e-03 -3.200e-03  1.421e-01  3.640e-02 -1.420e-02]\n",
      " [-5.000e-04  1.000e-04  1.763e-01  8.800e-03  2.000e-04]]\n",
      "mean_state_value -0.0016976460782485508\n",
      "episode 297/600\n",
      "p1 0.8384000000000003 p0 0.04039999999999995\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.330e-02 -7.820e-02 -7.780e-02 -2.060e-02 -3.640e-02]\n",
      " [-1.870e-02 -2.430e-02 -3.500e-02 -2.850e-02 -1.110e-02]\n",
      " [-3.600e-03 -1.720e-02  8.900e-03 -1.280e-02 -3.600e-03]\n",
      " [-4.300e-03 -3.200e-03  1.432e-01  3.640e-02 -1.410e-02]\n",
      " [-5.000e-04  1.000e-04  1.774e-01  9.100e-03  2.000e-04]]\n",
      "mean_state_value -0.0015130001328275725\n",
      "episode 298/600\n",
      "p1 0.8392000000000002 p0 0.040199999999999944\n",
      "trajectorySteps 20\n",
      "[[1 1 1 3 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.310e-02 -7.780e-02 -7.740e-02 -2.050e-02 -3.620e-02]\n",
      " [-1.860e-02 -2.420e-02 -3.490e-02 -2.830e-02 -1.100e-02]\n",
      " [-3.600e-03 -1.710e-02  9.000e-03 -1.280e-02 -3.600e-03]\n",
      " [-4.300e-03 -3.200e-03  1.444e-01  3.650e-02 -1.410e-02]\n",
      " [-5.000e-04  1.000e-04  1.784e-01  9.300e-03  3.000e-04]]\n",
      "mean_state_value -0.001328926878583897\n",
      "episode 299/600\n",
      "p1 0.8400000000000002 p0 0.039999999999999945\n",
      "trajectorySteps 17\n",
      "[[1 1 2 1 2]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.300e-02 -7.740e-02 -7.700e-02 -2.040e-02 -3.600e-02]\n",
      " [-1.850e-02 -2.400e-02 -3.470e-02 -2.820e-02 -1.100e-02]\n",
      " [-3.600e-03 -1.700e-02  9.000e-03 -1.270e-02 -3.500e-03]\n",
      " [-4.300e-03 -3.200e-03  1.455e-01  3.650e-02 -1.400e-02]\n",
      " [-5.000e-04  1.000e-04  1.795e-01  9.600e-03  3.000e-04]]\n",
      "mean_state_value -0.0011431669267964665\n",
      "episode 300/600\n",
      "p1 0.8408000000000002 p0 0.03979999999999995\n",
      "trajectorySteps 18\n",
      "[[1 2 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.290e-02 -7.730e-02 -7.670e-02 -2.030e-02 -3.580e-02]\n",
      " [-1.840e-02 -2.390e-02 -3.450e-02 -2.800e-02 -1.090e-02]\n",
      " [-3.600e-03 -1.700e-02  9.100e-03 -1.260e-02 -3.500e-03]\n",
      " [-4.300e-03 -3.200e-03  1.466e-01  3.660e-02 -1.390e-02]\n",
      " [-5.000e-04  1.000e-04  1.806e-01  9.800e-03  3.000e-04]]\n",
      "mean_state_value -0.0009719604787310923\n",
      "episode 301/600\n",
      "p1 0.8416000000000002 p0 0.03959999999999995\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.280e-02 -7.690e-02 -7.630e-02 -2.020e-02 -3.560e-02]\n",
      " [-1.830e-02 -2.380e-02 -3.440e-02 -2.790e-02 -1.090e-02]\n",
      " [-3.600e-03 -1.690e-02  9.200e-03 -1.260e-02 -3.500e-03]\n",
      " [-4.300e-03 -3.200e-03  1.477e-01  3.660e-02 -1.380e-02]\n",
      " [-5.000e-04  1.000e-04  1.816e-01  1.010e-02  4.000e-04]]\n",
      "mean_state_value -0.0007872625787018803\n",
      "episode 302/600\n",
      "p1 0.8424000000000003 p0 0.03939999999999995\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.270e-02 -7.660e-02 -7.590e-02 -2.010e-02 -3.550e-02]\n",
      " [-1.820e-02 -2.370e-02 -3.420e-02 -2.780e-02 -1.080e-02]\n",
      " [-3.600e-03 -1.680e-02  9.300e-03 -1.250e-02 -3.500e-03]\n",
      " [-4.200e-03 -3.100e-03  1.488e-01  3.670e-02 -1.380e-02]\n",
      " [-5.000e-04  1.000e-04  1.827e-01  1.030e-02  4.000e-04]]\n",
      "mean_state_value -0.0006023754195032267\n",
      "episode 303/600\n",
      "p1 0.8432000000000002 p0 0.03919999999999994\n",
      "trajectorySteps 19\n",
      "[[1 3 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.260e-02 -7.620e-02 -7.550e-02 -2.000e-02 -3.530e-02]\n",
      " [-1.810e-02 -2.360e-02 -3.400e-02 -2.760e-02 -1.080e-02]\n",
      " [-3.700e-03 -1.670e-02  9.400e-03 -1.240e-02 -3.500e-03]\n",
      " [-4.200e-03 -3.100e-03  1.500e-01  3.670e-02 -1.370e-02]\n",
      " [-5.000e-04  1.000e-04  1.837e-01  1.060e-02  4.000e-04]]\n",
      "mean_state_value -0.0004181262893858346\n",
      "episode 304/600\n",
      "p1 0.8440000000000002 p0 0.038999999999999944\n",
      "trajectorySteps 25\n",
      "[[1 1 1 1 1]\n",
      " [3 0 0 0 1]\n",
      " [3 1 0 1 4]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.240e-02 -7.580e-02 -7.510e-02 -1.990e-02 -3.510e-02]\n",
      " [-1.800e-02 -2.340e-02 -3.380e-02 -2.750e-02 -1.070e-02]\n",
      " [-3.600e-03 -1.660e-02  9.500e-03 -1.240e-02 -3.500e-03]\n",
      " [-4.200e-03 -3.100e-03  1.511e-01  3.680e-02 -1.360e-02]\n",
      " [-5.000e-04  1.000e-04  1.848e-01  1.080e-02  4.000e-04]]\n",
      "mean_state_value -0.00023444503918218024\n",
      "episode 305/600\n",
      "p1 0.8448000000000002 p0 0.038799999999999946\n",
      "trajectorySteps 18\n",
      "[[2 2 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0223 -0.0754 -0.0747 -0.0198 -0.0349]\n",
      " [-0.0179 -0.0233 -0.0337 -0.0273 -0.0107]\n",
      " [-0.0036 -0.0165  0.0095 -0.0123 -0.0034]\n",
      " [-0.0042 -0.0031  0.1522  0.0368 -0.0136]\n",
      " [-0.0005  0.0002  0.1859  0.011   0.0005]]\n",
      "mean_state_value -4.8332910806116404e-05\n",
      "episode 306/600\n",
      "p1 0.8456000000000002 p0 0.03859999999999995\n",
      "trajectorySteps 18\n",
      "[[1 2 1 1 1]\n",
      " [2 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.075  -0.0743 -0.0197 -0.0347]\n",
      " [-0.0179 -0.0232 -0.0335 -0.0272 -0.0106]\n",
      " [-0.0036 -0.0165  0.0096 -0.0122 -0.0034]\n",
      " [-0.0042 -0.0031  0.1521  0.0369 -0.0135]\n",
      " [-0.0005  0.0002  0.1869  0.0113  0.0005]]\n",
      "mean_state_value 8.502808281931739e-05\n",
      "episode 307/600\n",
      "p1 0.8464000000000003 p0 0.03839999999999995\n",
      "trajectorySteps 20\n",
      "[[1 3 3 1 1]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.075  -0.074  -0.0196 -0.0346]\n",
      " [-0.0178 -0.0231 -0.0333 -0.027  -0.0106]\n",
      " [-0.0036 -0.0164  0.0097 -0.0122 -0.0034]\n",
      " [-0.0041 -0.0031  0.1533  0.0369 -0.0134]\n",
      " [-0.0005  0.0002  0.188   0.0116  0.0005]]\n",
      "mean_state_value 0.0002591897879702098\n",
      "episode 308/600\n",
      "p1 0.8472000000000002 p0 0.03819999999999994\n",
      "trajectorySteps 19\n",
      "[[2 1 2 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.022  -0.0746 -0.0736 -0.0195 -0.0344]\n",
      " [-0.0177 -0.023  -0.0331 -0.0269 -0.0105]\n",
      " [-0.0036 -0.0163  0.0098 -0.0121 -0.0034]\n",
      " [-0.0041 -0.0031  0.1544  0.037  -0.0133]\n",
      " [-0.0005  0.0002  0.1891  0.0118  0.0005]]\n",
      "mean_state_value 0.00044669447699141883\n",
      "episode 309/600\n",
      "p1 0.8480000000000002 p0 0.037999999999999944\n",
      "trajectorySteps 19\n",
      "[[2 2 1 1 1]\n",
      " [2 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0219 -0.0745 -0.0732 -0.0194 -0.0342]\n",
      " [-0.0176 -0.0228 -0.033  -0.0268 -0.0105]\n",
      " [-0.0036 -0.0162  0.0099 -0.0121 -0.0034]\n",
      " [-0.0041 -0.003   0.1555  0.037  -0.0133]\n",
      " [-0.0005  0.0002  0.1901  0.0121  0.0006]]\n",
      "mean_state_value 0.0006200448164898335\n",
      "episode 310/600\n",
      "p1 0.8488000000000002 p0 0.037799999999999945\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 1 2 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0218 -0.0741 -0.0728 -0.0193 -0.034 ]\n",
      " [-0.0175 -0.0227 -0.0328 -0.0266 -0.0104]\n",
      " [-0.0036 -0.0161  0.0099 -0.012  -0.0034]\n",
      " [-0.0041 -0.003   0.1567  0.0371 -0.0132]\n",
      " [-0.0005  0.0004  0.1909  0.0123  0.0006]]\n",
      "mean_state_value 0.0008012844619261607\n",
      "episode 311/600\n",
      "p1 0.8496000000000002 p0 0.037599999999999946\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0216 -0.0737 -0.0724 -0.0192 -0.0338]\n",
      " [-0.0174 -0.0226 -0.0326 -0.0265 -0.0104]\n",
      " [-0.0036 -0.016   0.01   -0.0119 -0.0033]\n",
      " [-0.004  -0.003   0.1578  0.0371 -0.0131]\n",
      " [-0.0005  0.0004  0.1919  0.0126  0.0006]]\n",
      "mean_state_value 0.0009894718336009882\n",
      "episode 312/600\n",
      "p1 0.8504000000000003 p0 0.03739999999999995\n",
      "trajectorySteps 17\n",
      "[[2 2 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0215 -0.0733 -0.072  -0.0191 -0.0337]\n",
      " [-0.0173 -0.0225 -0.0324 -0.0263 -0.0103]\n",
      " [-0.0036 -0.0159  0.0101 -0.0119 -0.0033]\n",
      " [-0.004  -0.003   0.159   0.0372 -0.013 ]\n",
      " [-0.0005  0.0004  0.193   0.0129  0.0007]]\n",
      "mean_state_value 0.0011778333416132168\n",
      "episode 313/600\n",
      "p1 0.8512000000000002 p0 0.03719999999999994\n",
      "trajectorySteps 23\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 4]\n",
      " [0 0 1 2 4]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0214 -0.073  -0.0716 -0.019  -0.0335]\n",
      " [-0.0172 -0.0223 -0.0323 -0.0262 -0.0102]\n",
      " [-0.0036 -0.0159  0.0102 -0.0118 -0.0033]\n",
      " [-0.004  -0.003   0.1601  0.0372 -0.013 ]\n",
      " [-0.0005  0.0004  0.1941  0.0128  0.0007]]\n",
      "mean_state_value 0.0013517775299827554\n",
      "episode 314/600\n",
      "p1 0.8520000000000002 p0 0.03699999999999994\n",
      "trajectorySteps 15\n",
      "[[2 2 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0213 -0.0726 -0.0713 -0.0189 -0.0333]\n",
      " [-0.0172 -0.0222 -0.0321 -0.0261 -0.0102]\n",
      " [-0.0036 -0.0158  0.0103 -0.0117 -0.0033]\n",
      " [-0.004  -0.003   0.1612  0.0383 -0.0133]\n",
      " [-0.0005  0.0004  0.1943  0.0129  0.0007]]\n",
      "mean_state_value 0.0015233703834752466\n",
      "episode 315/600\n",
      "p1 0.8528000000000002 p0 0.036799999999999944\n",
      "trajectorySteps 19\n",
      "[[2 2 1 1 1]\n",
      " [1 1 0 1 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0212 -0.0725 -0.0709 -0.0188 -0.0331]\n",
      " [-0.0171 -0.0221 -0.0319 -0.0259 -0.0101]\n",
      " [-0.0035 -0.0157  0.0104 -0.0117 -0.0033]\n",
      " [-0.004  -0.0029  0.1612  0.0383 -0.0132]\n",
      " [-0.0005  0.0004  0.1954  0.0131  0.0008]]\n",
      "mean_state_value 0.001651415787283196\n",
      "episode 316/600\n",
      "p1 0.8536000000000002 p0 0.036599999999999945\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 3]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0211 -0.0721 -0.0705 -0.0187 -0.033 ]\n",
      " [-0.017  -0.022  -0.0317 -0.0258 -0.0101]\n",
      " [-0.0035 -0.0156  0.0104 -0.0116 -0.0032]\n",
      " [-0.0039 -0.0029  0.1623  0.0384 -0.0131]\n",
      " [-0.0005  0.0004  0.1965  0.0134  0.0008]]\n",
      "mean_state_value 0.0018400774148309884\n",
      "episode 317/600\n",
      "p1 0.8544000000000003 p0 0.036399999999999946\n",
      "trajectorySteps 18\n",
      "[[1 2 2 1 0]\n",
      " [1 1 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0209 -0.072  -0.0701 -0.0186 -0.0328]\n",
      " [-0.0169 -0.0219 -0.0316 -0.0256 -0.01  ]\n",
      " [-0.0035 -0.0155  0.0105 -0.0115 -0.0032]\n",
      " [-0.0039 -0.0029  0.1635  0.0384 -0.013 ]\n",
      " [-0.0005  0.0004  0.1976  0.0137  0.0008]]\n",
      "mean_state_value 0.002015867397228222\n",
      "episode 318/600\n",
      "p1 0.8552000000000002 p0 0.03619999999999994\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0208 -0.0716 -0.0697 -0.0185 -0.0326]\n",
      " [-0.0168 -0.0217 -0.0314 -0.0255 -0.01  ]\n",
      " [-0.0035 -0.0154  0.0106 -0.0115 -0.0032]\n",
      " [-0.0039 -0.0029  0.1646  0.0385 -0.013 ]\n",
      " [-0.0005  0.0005  0.1987  0.0139  0.0009]]\n",
      "mean_state_value 0.002204285418370904\n",
      "episode 319/600\n",
      "p1 0.8560000000000002 p0 0.03599999999999994\n",
      "trajectorySteps 20\n",
      "[[0 1 1 1 2]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 1 3]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0207 -0.0712 -0.0693 -0.0184 -0.0324]\n",
      " [-0.0171 -0.0216 -0.0312 -0.0254 -0.0099]\n",
      " [-0.0035 -0.0153  0.0107 -0.0114 -0.0033]\n",
      " [-0.0039 -0.0029  0.1658  0.0385 -0.0129]\n",
      " [-0.0005  0.0005  0.1998  0.0142  0.0009]]\n",
      "mean_state_value 0.002379100813747933\n",
      "episode 320/600\n",
      "p1 0.8568000000000002 p0 0.03579999999999994\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0708 -0.069  -0.0183 -0.0322]\n",
      " [-0.017  -0.0215 -0.0311 -0.0252 -0.0099]\n",
      " [-0.0035 -0.0153  0.0108 -0.0114 -0.0032]\n",
      " [-0.0038 -0.0029  0.1669  0.0386 -0.0128]\n",
      " [-0.0005  0.0005  0.2009  0.0145  0.0009]]\n",
      "mean_state_value 0.0025692789004119693\n",
      "episode 321/600\n",
      "p1 0.8576000000000003 p0 0.035599999999999944\n",
      "trajectorySteps 21\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 2 2]\n",
      " [0 0 1 1 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0704 -0.0686 -0.0182 -0.0321]\n",
      " [-0.0169 -0.0214 -0.0309 -0.0251 -0.0098]\n",
      " [-0.0035 -0.0152  0.0108 -0.0116 -0.0032]\n",
      " [-0.0038 -0.0028  0.1681  0.0386 -0.0127]\n",
      " [-0.0005  0.0005  0.2019  0.0148  0.001 ]]\n",
      "mean_state_value 0.002745629119536107\n",
      "episode 322/600\n",
      "p1 0.8584000000000003 p0 0.035399999999999945\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0204 -0.07   -0.0682 -0.0181 -0.0319]\n",
      " [-0.0168 -0.0213 -0.0307 -0.0249 -0.0097]\n",
      " [-0.0034 -0.0151  0.0109 -0.0116 -0.0032]\n",
      " [-0.0038 -0.0028  0.1692  0.0387 -0.0127]\n",
      " [-0.0005  0.0005  0.203   0.015   0.001 ]]\n",
      "mean_state_value 0.002936235777147463\n",
      "episode 323/600\n",
      "p1 0.8592000000000002 p0 0.03519999999999994\n",
      "trajectorySteps 17\n",
      "[[1 2 1 2 2]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0203 -0.0696 -0.0678 -0.018  -0.0317]\n",
      " [-0.0167 -0.0211 -0.0305 -0.0248 -0.0097]\n",
      " [-0.0034 -0.015   0.011  -0.0115 -0.0032]\n",
      " [-0.0038 -0.0028  0.1704  0.0387 -0.0126]\n",
      " [-0.0005  0.0005  0.2041  0.0153  0.001 ]]\n",
      "mean_state_value 0.0031270175243045335\n",
      "episode 324/600\n",
      "p1 0.8600000000000002 p0 0.03499999999999994\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0202 -0.0692 -0.0674 -0.0179 -0.0315]\n",
      " [-0.0166 -0.021  -0.0304 -0.0247 -0.0096]\n",
      " [-0.0034 -0.0149  0.0111 -0.0114 -0.0032]\n",
      " [-0.0038 -0.0028  0.1716  0.0388 -0.0125]\n",
      " [-0.0005  0.0005  0.2052  0.0156  0.0011]]\n",
      "mean_state_value 0.00331673159440055\n",
      "episode 325/600\n",
      "p1 0.8608000000000002 p0 0.03479999999999994\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0201 -0.0688 -0.067  -0.0177 -0.0313]\n",
      " [-0.0165 -0.0209 -0.0302 -0.0245 -0.0096]\n",
      " [-0.0034 -0.0148  0.0112 -0.0114 -0.0032]\n",
      " [-0.0037 -0.0028  0.1727  0.0388 -0.0124]\n",
      " [-0.0005  0.0005  0.2063  0.0159  0.0011]]\n",
      "mean_state_value 0.0035066051268514324\n",
      "episode 326/600\n",
      "p1 0.8616000000000003 p0 0.03459999999999994\n",
      "trajectorySteps 19\n",
      "[[2 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [3 2 0 0 1]\n",
      " [0 1 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0199 -0.0684 -0.0666 -0.0176 -0.0312]\n",
      " [-0.0164 -0.0208 -0.03   -0.0244 -0.0095]\n",
      " [-0.0034 -0.0151  0.0113 -0.0113 -0.0032]\n",
      " [-0.0037 -0.0028  0.1739  0.0399 -0.0127]\n",
      " [-0.0004  0.0005  0.2066  0.016   0.0011]]\n",
      "mean_state_value 0.0036687022972142387\n",
      "episode 327/600\n",
      "p1 0.8624000000000003 p0 0.034399999999999945\n",
      "trajectorySteps 22\n",
      "[[1 1 1 2 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 3]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0198 -0.068  -0.0663 -0.0175 -0.031 ]\n",
      " [-0.0163 -0.0207 -0.0298 -0.0242 -0.0095]\n",
      " [-0.0034 -0.015   0.0113 -0.0112 -0.0031]\n",
      " [-0.0037 -0.0027  0.1751  0.0399 -0.0126]\n",
      " [-0.0004  0.0005  0.2077  0.0159  0.0012]]\n",
      "mean_state_value 0.003846905598412216\n",
      "episode 328/600\n",
      "p1 0.8632000000000002 p0 0.03419999999999994\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0197 -0.0677 -0.0659 -0.0174 -0.0308]\n",
      " [-0.0162 -0.0205 -0.0297 -0.0241 -0.0094]\n",
      " [-0.0033 -0.0149  0.0114 -0.0112 -0.0031]\n",
      " [-0.0037 -0.0027  0.1754  0.04   -0.0125]\n",
      " [-0.0004  0.0005  0.2088  0.0162  0.0012]]\n",
      "mean_state_value 0.004004678296467175\n",
      "episode 329/600\n",
      "p1 0.8640000000000002 p0 0.03399999999999994\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0196 -0.0673 -0.0655 -0.0173 -0.0306]\n",
      " [-0.0161 -0.0204 -0.0295 -0.0239 -0.0094]\n",
      " [-0.0033 -0.0148  0.0115 -0.0111 -0.0031]\n",
      " [-0.0037 -0.0027  0.1765  0.04   -0.0125]\n",
      " [-0.0004  0.0005  0.2099  0.0165  0.0013]]\n",
      "mean_state_value 0.004196703386417714\n",
      "episode 330/600\n",
      "p1 0.8648000000000002 p0 0.03379999999999994\n",
      "trajectorySteps 18\n",
      "[[1 1 2 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0195 -0.0669 -0.0651 -0.0172 -0.0304]\n",
      " [-0.016  -0.0203 -0.0293 -0.0238 -0.0093]\n",
      " [-0.0033 -0.0147  0.0116 -0.0111 -0.0031]\n",
      " [-0.0036 -0.0027  0.1777  0.0401 -0.0124]\n",
      " [-0.0004  0.0006  0.211   0.0167  0.0013]]\n",
      "mean_state_value 0.00438821395648554\n",
      "episode 331/600\n",
      "p1 0.8656000000000003 p0 0.03359999999999994\n",
      "trajectorySteps 20\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 1 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0194 -0.0665 -0.0647 -0.0171 -0.0303]\n",
      " [-0.0159 -0.0202 -0.0291 -0.0237 -0.0093]\n",
      " [-0.0033 -0.0147  0.0117 -0.011  -0.0031]\n",
      " [-0.0036 -0.0027  0.1789  0.0401 -0.0123]\n",
      " [-0.0004  0.0006  0.2121  0.017   0.0013]]\n",
      "mean_state_value 0.004580693228894726\n",
      "episode 332/600\n",
      "p1 0.8664000000000003 p0 0.033399999999999944\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0193 -0.0661 -0.0643 -0.017  -0.0301]\n",
      " [-0.0158 -0.0201 -0.029  -0.0235 -0.0092]\n",
      " [-0.0033 -0.0146  0.0117 -0.0109 -0.003 ]\n",
      " [-0.0036 -0.0027  0.1801  0.0402 -0.0122]\n",
      " [-0.0004  0.0006  0.2132  0.0173  0.0014]]\n",
      "mean_state_value 0.004773249784056718\n",
      "episode 333/600\n",
      "p1 0.8672000000000002 p0 0.03319999999999994\n",
      "trajectorySteps 15\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0192 -0.0657 -0.064  -0.0169 -0.0299]\n",
      " [-0.0157 -0.0199 -0.0288 -0.0234 -0.0091]\n",
      " [-0.0033 -0.0145  0.0118 -0.0109 -0.003 ]\n",
      " [-0.0036 -0.0027  0.1812  0.0412 -0.0125]\n",
      " [-0.0004  0.0006  0.2135  0.0174  0.0014]]\n",
      "mean_state_value 0.004950413011087609\n",
      "episode 334/600\n",
      "p1 0.8680000000000002 p0 0.03299999999999994\n",
      "trajectorySteps 18\n",
      "[[1 1 2 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0191 -0.0653 -0.0636 -0.0168 -0.0297]\n",
      " [-0.0156 -0.0198 -0.0286 -0.0232 -0.0091]\n",
      " [-0.0033 -0.0144  0.0119 -0.0108 -0.003 ]\n",
      " [-0.0035 -0.0026  0.1824  0.0413 -0.0124]\n",
      " [-0.0004  0.0006  0.2146  0.0177  0.0014]]\n",
      "mean_state_value 0.00514221408624052\n",
      "episode 335/600\n",
      "p1 0.8688000000000002 p0 0.03279999999999994\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0189 -0.0649 -0.0632 -0.0167 -0.0295]\n",
      " [-0.0156 -0.0197 -0.0285 -0.0231 -0.009 ]\n",
      " [-0.0032 -0.0143  0.012  -0.0107 -0.003 ]\n",
      " [-0.0035 -0.0026  0.1836  0.0414 -0.0123]\n",
      " [-0.0004  0.0006  0.2157  0.018   0.0014]]\n",
      "mean_state_value 0.005334176552231964\n",
      "episode 336/600\n",
      "p1 0.8696000000000003 p0 0.03259999999999994\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0188 -0.0645 -0.0628 -0.0166 -0.0294]\n",
      " [-0.0155 -0.0196 -0.0283 -0.023  -0.009 ]\n",
      " [-0.0033 -0.0142  0.0121 -0.0107 -0.003 ]\n",
      " [-0.0035 -0.0026  0.1848  0.0414 -0.0122]\n",
      " [-0.0004  0.0006  0.2168  0.0183  0.0015]]\n",
      "mean_state_value 0.005526359887865675\n",
      "episode 337/600\n",
      "p1 0.8704000000000003 p0 0.03239999999999994\n",
      "trajectorySteps 19\n",
      "[[1 1 1 2 4]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0187 -0.0641 -0.0624 -0.0165 -0.0292]\n",
      " [-0.0154 -0.0195 -0.0281 -0.0228 -0.0089]\n",
      " [-0.0033 -0.0141  0.0122 -0.0106 -0.003 ]\n",
      " [-0.0035 -0.0026  0.1859  0.0415 -0.0121]\n",
      " [-0.0004  0.0006  0.2179  0.0186  0.0015]]\n",
      "mean_state_value 0.005718162848909797\n",
      "episode 338/600\n",
      "p1 0.8712000000000002 p0 0.03219999999999994\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0186 -0.0637 -0.062  -0.0164 -0.029 ]\n",
      " [-0.0153 -0.0193 -0.0279 -0.0227 -0.0089]\n",
      " [-0.0032 -0.014   0.0122 -0.0105 -0.0029]\n",
      " [-0.0035 -0.0026  0.1871  0.0415 -0.0121]\n",
      " [-0.0004  0.0006  0.219   0.019   0.0016]]\n",
      "mean_state_value 0.005919813328679869\n",
      "episode 339/600\n",
      "p1 0.8720000000000002 p0 0.03199999999999994\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0185 -0.0633 -0.0616 -0.0163 -0.0288]\n",
      " [-0.0152 -0.0192 -0.0278 -0.0225 -0.0088]\n",
      " [-0.0032 -0.014   0.0123 -0.0105 -0.0029]\n",
      " [-0.0034 -0.0026  0.1883  0.0416 -0.012 ]\n",
      " [-0.0004  0.0006  0.2202  0.0193  0.0016]]\n",
      "mean_state_value 0.0061137258618594046\n",
      "episode 340/600\n",
      "p1 0.8728000000000002 p0 0.03179999999999994\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0184 -0.0629 -0.0613 -0.0162 -0.0287]\n",
      " [-0.0151 -0.0191 -0.0276 -0.0224 -0.0088]\n",
      " [-0.0032 -0.0139  0.0124 -0.0104 -0.0029]\n",
      " [-0.0034 -0.0025  0.1895  0.0416 -0.0119]\n",
      " [-0.0004  0.0006  0.2213  0.0196  0.0016]]\n",
      "mean_state_value 0.006305468517556001\n",
      "episode 341/600\n",
      "p1 0.8736000000000003 p0 0.03159999999999994\n",
      "trajectorySteps 16\n",
      "[[2 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0182 -0.0625 -0.0609 -0.0161 -0.0285]\n",
      " [-0.015  -0.019  -0.0274 -0.0223 -0.0087]\n",
      " [-0.0032 -0.0138  0.0125 -0.0103 -0.0029]\n",
      " [-0.0034 -0.0025  0.1907  0.0417 -0.0118]\n",
      " [-0.0004  0.0006  0.2224  0.0199  0.0017]]\n",
      "mean_state_value 0.006499749088098462\n",
      "episode 342/600\n",
      "p1 0.8744000000000003 p0 0.03139999999999994\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0181 -0.0621 -0.0605 -0.016  -0.0283]\n",
      " [-0.0149 -0.0189 -0.0272 -0.0221 -0.0086]\n",
      " [-0.0032 -0.0137  0.0126 -0.0103 -0.0029]\n",
      " [-0.0034 -0.0025  0.1919  0.0417 -0.0118]\n",
      " [-0.0004  0.0007  0.2235  0.0202  0.0017]]\n",
      "mean_state_value 0.006692977078081759\n",
      "episode 343/600\n",
      "p1 0.8752000000000002 p0 0.03119999999999994\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.018  -0.0617 -0.0601 -0.0159 -0.0281]\n",
      " [-0.0148 -0.0187 -0.0271 -0.022  -0.0086]\n",
      " [-0.0032 -0.0136  0.0126 -0.0102 -0.0029]\n",
      " [-0.0034 -0.0025  0.1931  0.0428 -0.0117]\n",
      " [-0.0004  0.0007  0.2238  0.02    0.0017]]\n",
      "mean_state_value 0.006873995540672357\n",
      "episode 344/600\n",
      "p1 0.8760000000000002 p0 0.030999999999999937\n",
      "trajectorySteps 17\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0179 -0.0613 -0.0597 -0.0158 -0.0279]\n",
      " [-0.0147 -0.0186 -0.0269 -0.0218 -0.0085]\n",
      " [-0.0031 -0.0135  0.0127 -0.0101 -0.0029]\n",
      " [-0.0033 -0.0025  0.1943  0.0429 -0.0116]\n",
      " [-0.0004  0.0007  0.2249  0.0203  0.0018]]\n",
      "mean_state_value 0.007068291307270639\n",
      "episode 345/600\n",
      "p1 0.8768000000000002 p0 0.03079999999999994\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0178 -0.0609 -0.0593 -0.0157 -0.0278]\n",
      " [-0.0146 -0.0185 -0.0267 -0.0217 -0.0085]\n",
      " [-0.0031 -0.0134  0.0128 -0.0101 -0.0028]\n",
      " [-0.0033 -0.0025  0.1943  0.0429 -0.0115]\n",
      " [-0.0004  0.0007  0.2261  0.0206  0.0018]]\n",
      "mean_state_value 0.0072163267499112125\n",
      "episode 346/600\n",
      "p1 0.8776000000000003 p0 0.03059999999999994\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0177 -0.0605 -0.059  -0.0156 -0.0276]\n",
      " [-0.0145 -0.0184 -0.0265 -0.0216 -0.0084]\n",
      " [-0.0031 -0.0133  0.0129 -0.01   -0.0028]\n",
      " [-0.0033 -0.0024  0.1955  0.043  -0.0114]\n",
      " [-0.0004  0.0007  0.2272  0.0209  0.0018]]\n",
      "mean_state_value 0.0074097512210293615\n",
      "episode 347/600\n",
      "p1 0.8784000000000003 p0 0.030399999999999937\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [1 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0176 -0.0601 -0.0586 -0.0155 -0.0274]\n",
      " [-0.0144 -0.0183 -0.0264 -0.0214 -0.0084]\n",
      " [-0.0031 -0.0133  0.013  -0.0099 -0.0028]\n",
      " [-0.0033 -0.0024  0.1967  0.043  -0.0114]\n",
      " [-0.0004  0.0007  0.2283  0.0212  0.0018]]\n",
      "mean_state_value 0.007604044636083222\n",
      "episode 348/600\n",
      "p1 0.8792000000000002 p0 0.03019999999999994\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 2]\n",
      " [2 0 0 0 1]\n",
      " [4 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0175 -0.0597 -0.0582 -0.0154 -0.0272]\n",
      " [-0.0143 -0.0181 -0.0262 -0.0213 -0.0083]\n",
      " [-0.0031 -0.0132  0.0131 -0.0099 -0.0028]\n",
      " [-0.0032 -0.0024  0.1979  0.0431 -0.0113]\n",
      " [-0.0004  0.0007  0.2294  0.0215  0.0019]]\n",
      "mean_state_value 0.007799692140255557\n",
      "episode 349/600\n",
      "p1 0.8800000000000002 p0 0.029999999999999936\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0173 -0.0593 -0.0578 -0.0153 -0.027 ]\n",
      " [-0.0142 -0.018  -0.026  -0.0211 -0.0083]\n",
      " [-0.003  -0.0131  0.0131 -0.0098 -0.0028]\n",
      " [-0.0032 -0.0024  0.1991  0.0431 -0.0112]\n",
      " [-0.0004  0.0007  0.2306  0.0218  0.0019]]\n",
      "mean_state_value 0.00799551700408379\n",
      "episode 350/600\n",
      "p1 0.8808000000000002 p0 0.029799999999999938\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0172 -0.0589 -0.0574 -0.0152 -0.0269]\n",
      " [-0.0142 -0.0179 -0.0258 -0.021  -0.0082]\n",
      " [-0.003  -0.013   0.0132 -0.0097 -0.0027]\n",
      " [-0.0032 -0.0024  0.2003  0.0432 -0.0111]\n",
      " [-0.0004  0.0007  0.2317  0.0221  0.002 ]]\n",
      "mean_state_value 0.008191537646101733\n",
      "episode 351/600\n",
      "p1 0.8816000000000003 p0 0.02959999999999994\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0171 -0.0586 -0.057  -0.0151 -0.0267]\n",
      " [-0.0141 -0.0178 -0.0257 -0.0208 -0.0082]\n",
      " [-0.003  -0.0129  0.0133 -0.0097 -0.0027]\n",
      " [-0.0032 -0.0024  0.2015  0.0432 -0.0111]\n",
      " [-0.0004  0.0007  0.2329  0.0224  0.002 ]]\n",
      "mean_state_value 0.008387726163643217\n",
      "episode 352/600\n",
      "p1 0.8824000000000003 p0 0.029399999999999937\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 2]\n",
      " [2 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0582 -0.0566 -0.015  -0.0265]\n",
      " [-0.014  -0.0177 -0.0255 -0.0207 -0.0081]\n",
      " [-0.003  -0.0128  0.0134 -0.0096 -0.0027]\n",
      " [-0.0032 -0.0023  0.2027  0.0433 -0.011 ]\n",
      " [-0.0004  0.0007  0.234   0.0227  0.0021]]\n",
      "mean_state_value 0.008583025106797603\n",
      "episode 353/600\n",
      "p1 0.8832000000000002 p0 0.029199999999999938\n",
      "trajectorySteps 18\n",
      "[[0 2 2 1 1]\n",
      " [1 1 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0578 -0.0563 -0.0149 -0.0263]\n",
      " [-0.0142 -0.0175 -0.0253 -0.0206 -0.008 ]\n",
      " [-0.003  -0.0127  0.0135 -0.0095 -0.0027]\n",
      " [-0.0031 -0.0023  0.2039  0.0433 -0.0109]\n",
      " [-0.0004  0.0007  0.2351  0.023   0.0021]]\n",
      "mean_state_value 0.008767803590327843\n",
      "episode 354/600\n",
      "p1 0.8840000000000002 p0 0.028999999999999936\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0574 -0.0559 -0.0148 -0.0261]\n",
      " [-0.0141 -0.0174 -0.0252 -0.0204 -0.008 ]\n",
      " [-0.0029 -0.0126  0.0135 -0.0095 -0.0027]\n",
      " [-0.0031 -0.0023  0.2051  0.0434 -0.0108]\n",
      " [-0.0004  0.0007  0.2363  0.0233  0.0022]]\n",
      "mean_state_value 0.008964928550748057\n",
      "episode 355/600\n",
      "p1 0.8848000000000003 p0 0.028799999999999937\n",
      "trajectorySteps 19\n",
      "[[1 2 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0572 -0.0555 -0.0147 -0.026 ]\n",
      " [-0.014  -0.0173 -0.025  -0.0203 -0.0079]\n",
      " [-0.0029 -0.0126  0.0136 -0.0094 -0.0027]\n",
      " [-0.0031 -0.0023  0.2063  0.0434 -0.0108]\n",
      " [-0.0004  0.0008  0.2374  0.0236  0.0022]]\n",
      "mean_state_value 0.009151020942569343\n",
      "episode 356/600\n",
      "p1 0.8856000000000003 p0 0.028599999999999938\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0568 -0.0551 -0.0146 -0.0258]\n",
      " [-0.0139 -0.0172 -0.0248 -0.0201 -0.0079]\n",
      " [-0.0029 -0.0125  0.0137 -0.0094 -0.0027]\n",
      " [-0.0031 -0.0023  0.2064  0.0435 -0.0107]\n",
      " [-0.0004  0.0008  0.2386  0.0239  0.0022]]\n",
      "mean_state_value 0.009300692177163181\n",
      "episode 357/600\n",
      "p1 0.8864000000000003 p0 0.028399999999999936\n",
      "trajectorySteps 14\n",
      "[[0 1 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0564 -0.0547 -0.0145 -0.0256]\n",
      " [-0.0141 -0.0171 -0.0246 -0.02   -0.0078]\n",
      " [-0.0029 -0.0124  0.0138 -0.0093 -0.0026]\n",
      " [-0.0031 -0.0023  0.2076  0.0435 -0.0106]\n",
      " [-0.0004  0.0008  0.2397  0.0243  0.0023]]\n",
      "mean_state_value 0.009487268039920695\n",
      "episode 358/600\n",
      "p1 0.8872000000000002 p0 0.028199999999999937\n",
      "trajectorySteps 18\n",
      "[[1 2 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0163 -0.056  -0.0543 -0.0144 -0.0254]\n",
      " [-0.014  -0.0169 -0.0245 -0.0199 -0.0078]\n",
      " [-0.0029 -0.0123  0.0139 -0.0092 -0.0026]\n",
      " [-0.003  -0.0023  0.2088  0.0436 -0.0105]\n",
      " [-0.0004  0.0008  0.2409  0.0246  0.0023]]\n",
      "mean_state_value 0.009683881132853291\n",
      "episode 359/600\n",
      "p1 0.8880000000000002 p0 0.027999999999999935\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0162 -0.0556 -0.054  -0.0143 -0.0252]\n",
      " [-0.0139 -0.0168 -0.0243 -0.0197 -0.0077]\n",
      " [-0.0028 -0.0122  0.014  -0.0092 -0.0026]\n",
      " [-0.003  -0.0022  0.21    0.0437 -0.0105]\n",
      " [-0.0004  0.0008  0.242   0.0249  0.0024]]\n",
      "mean_state_value 0.009881757349599934\n",
      "episode 360/600\n",
      "p1 0.8888000000000003 p0 0.027799999999999936\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0161 -0.0552 -0.0536 -0.0142 -0.0251]\n",
      " [-0.0138 -0.0167 -0.0241 -0.0196 -0.0077]\n",
      " [-0.0028 -0.0121  0.014  -0.0091 -0.0026]\n",
      " [-0.003  -0.0022  0.2112  0.0437 -0.0104]\n",
      " [-0.0004  0.0008  0.2432  0.0252  0.0024]]\n",
      "mean_state_value 0.010077804649249247\n",
      "episode 361/600\n",
      "p1 0.8896000000000003 p0 0.027599999999999937\n",
      "trajectorySteps 20\n",
      "[[1 1 1 2 1]\n",
      " [2 0 0 2 3]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.016  -0.0548 -0.0532 -0.0141 -0.0249]\n",
      " [-0.0137 -0.0166 -0.0239 -0.0194 -0.0076]\n",
      " [-0.0028 -0.012   0.0141 -0.009  -0.0026]\n",
      " [-0.003  -0.0022  0.2125  0.0438 -0.0103]\n",
      " [-0.0004  0.0008  0.2443  0.0255  0.0024]]\n",
      "mean_state_value 0.010276052107206978\n",
      "episode 362/600\n",
      "p1 0.8904000000000003 p0 0.027399999999999935\n",
      "trajectorySteps 18\n",
      "[[1 1 1 0 0]\n",
      " [1 0 1 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0159 -0.0544 -0.053  -0.014  -0.0247]\n",
      " [-0.0136 -0.0165 -0.0238 -0.0193 -0.0075]\n",
      " [-0.0028 -0.0119  0.0142 -0.009  -0.0026]\n",
      " [-0.0029 -0.0022  0.2137  0.0438 -0.0102]\n",
      " [-0.0004  0.0008  0.2455  0.0258  0.0025]]\n",
      "mean_state_value 0.010465225628615016\n",
      "episode 363/600\n",
      "p1 0.8912000000000002 p0 0.027199999999999936\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.054  -0.0526 -0.0139 -0.0245]\n",
      " [-0.0135 -0.0163 -0.0236 -0.0192 -0.0075]\n",
      " [-0.0028 -0.0119  0.0143 -0.0089 -0.0025]\n",
      " [-0.0029 -0.0022  0.2149  0.0439 -0.0101]\n",
      " [-0.0004  0.0008  0.2466  0.0261  0.0025]]\n",
      "mean_state_value 0.010663898707424657\n",
      "episode 364/600\n",
      "p1 0.8920000000000002 p0 0.026999999999999934\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0536 -0.0523 -0.0138 -0.0243]\n",
      " [-0.0134 -0.0162 -0.0234 -0.019  -0.0074]\n",
      " [-0.0028 -0.0118  0.0144 -0.0088 -0.0025]\n",
      " [-0.0029 -0.0022  0.2161  0.0439 -0.01  ]\n",
      " [-0.0004  0.0008  0.2478  0.0264  0.0026]]\n",
      "mean_state_value 0.010861746073476519\n",
      "episode 365/600\n",
      "p1 0.8928000000000003 p0 0.026799999999999935\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0155 -0.0532 -0.0519 -0.0137 -0.0242]\n",
      " [-0.0133 -0.0161 -0.0232 -0.0189 -0.0074]\n",
      " [-0.0027 -0.0117  0.0144 -0.0088 -0.0025]\n",
      " [-0.0029 -0.0021  0.2162  0.044  -0.01  ]\n",
      " [-0.0003  0.0008  0.2489  0.0268  0.0026]]\n",
      "mean_state_value 0.011014686590338356\n",
      "episode 366/600\n",
      "p1 0.8936000000000003 p0 0.026599999999999936\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0154 -0.0528 -0.0515 -0.0136 -0.024 ]\n",
      " [-0.0132 -0.016  -0.0231 -0.0187 -0.0073]\n",
      " [-0.0027 -0.0116  0.0145 -0.0087 -0.0025]\n",
      " [-0.0029 -0.0021  0.2174  0.044  -0.0099]\n",
      " [-0.0003  0.0008  0.2501  0.0271  0.0027]]\n",
      "mean_state_value 0.011212959515104927\n",
      "episode 367/600\n",
      "p1 0.8944000000000003 p0 0.026399999999999934\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0153 -0.0524 -0.0511 -0.0135 -0.0238]\n",
      " [-0.0131 -0.0159 -0.0229 -0.0186 -0.0073]\n",
      " [-0.0028 -0.0115  0.0146 -0.0086 -0.0025]\n",
      " [-0.0028 -0.0021  0.2187  0.0441 -0.0098]\n",
      " [-0.0003  0.0009  0.2513  0.0274  0.0027]]\n",
      "mean_state_value 0.011411426643078482\n",
      "episode 368/600\n",
      "p1 0.8952000000000002 p0 0.026199999999999935\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [4 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 3 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0152 -0.0521 -0.0507 -0.0134 -0.0236]\n",
      " [-0.013  -0.0157 -0.0227 -0.0185 -0.0072]\n",
      " [-0.0028 -0.0114  0.0147 -0.0086 -0.0024]\n",
      " [-0.0028 -0.0021  0.2199  0.0441 -0.0097]\n",
      " [-0.0003  0.0009  0.2524  0.0277  0.0028]]\n",
      "mean_state_value 0.011607709634605006\n",
      "episode 369/600\n",
      "p1 0.8960000000000002 p0 0.025999999999999933\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.015  -0.0517 -0.0503 -0.0133 -0.0234]\n",
      " [-0.0129 -0.0156 -0.0226 -0.0183 -0.0072]\n",
      " [-0.0028 -0.0113  0.0148 -0.0085 -0.0024]\n",
      " [-0.0028 -0.0021  0.2211  0.0442 -0.0096]\n",
      " [-0.0003  0.0009  0.2535  0.028   0.0028]]\n",
      "mean_state_value 0.011807514218103514\n",
      "episode 370/600\n",
      "p1 0.8968000000000003 p0 0.025799999999999934\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0149 -0.0513 -0.0499 -0.0132 -0.0233]\n",
      " [-0.0128 -0.0155 -0.0224 -0.0182 -0.0071]\n",
      " [-0.0027 -0.0113  0.0149 -0.0084 -0.0024]\n",
      " [-0.0028 -0.0021  0.2224  0.0442 -0.0096]\n",
      " [-0.0003  0.0009  0.2547  0.0284  0.0028]]\n",
      "mean_state_value 0.012007497971673123\n",
      "episode 371/600\n",
      "p1 0.8976000000000003 p0 0.025599999999999935\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0148 -0.0509 -0.0496 -0.0131 -0.0231]\n",
      " [-0.0127 -0.0154 -0.0222 -0.018  -0.007 ]\n",
      " [-0.0027 -0.0112  0.0149 -0.0084 -0.0024]\n",
      " [-0.0028 -0.002   0.2236  0.0443 -0.0095]\n",
      " [-0.0003  0.0009  0.2559  0.0287  0.0029]]\n",
      "mean_state_value 0.012206660747079733\n",
      "episode 372/600\n",
      "p1 0.8984000000000003 p0 0.025399999999999933\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0147 -0.0505 -0.0492 -0.013  -0.0229]\n",
      " [-0.0126 -0.0153 -0.022  -0.0179 -0.007 ]\n",
      " [-0.0027 -0.0111  0.015  -0.0083 -0.0024]\n",
      " [-0.0027 -0.002   0.2248  0.0443 -0.0094]\n",
      " [-0.0003  0.0009  0.257   0.029   0.0029]]\n",
      "mean_state_value 0.012407010931343894\n",
      "episode 373/600\n",
      "p1 0.8992000000000002 p0 0.025199999999999934\n",
      "trajectorySteps 17\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0146 -0.0501 -0.0488 -0.0129 -0.0227]\n",
      " [-0.0125 -0.0151 -0.0219 -0.0177 -0.0069]\n",
      " [-0.0027 -0.011   0.0151 -0.0082 -0.0024]\n",
      " [-0.0027 -0.002   0.2261  0.0444 -0.0093]\n",
      " [-0.0003  0.0009  0.2582  0.0294  0.0029]]\n",
      "mean_state_value 0.01260516639039195\n",
      "episode 374/600\n",
      "p1 0.9000000000000002 p0 0.024999999999999932\n",
      "trajectorySteps 18\n",
      "[[0 2 1 1 1]\n",
      " [1 2 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0145 -0.0499 -0.0484 -0.0128 -0.0225]\n",
      " [-0.0126 -0.015  -0.0217 -0.0176 -0.0069]\n",
      " [-0.0027 -0.0109  0.0152 -0.0082 -0.0023]\n",
      " [-0.0027 -0.002   0.2273  0.0444 -0.0092]\n",
      " [-0.0003  0.0009  0.2594  0.0297  0.003 ]]\n",
      "mean_state_value 0.012787857477992303\n",
      "episode 375/600\n",
      "p1 0.9008000000000003 p0 0.024799999999999933\n",
      "trajectorySteps 18\n",
      "[[1 1 1 2 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0144 -0.0495 -0.048  -0.0127 -0.0224]\n",
      " [-0.0125 -0.0149 -0.0215 -0.0175 -0.0068]\n",
      " [-0.0027 -0.0108  0.0153 -0.0081 -0.0023]\n",
      " [-0.0027 -0.002   0.2286  0.0445 -0.0092]\n",
      " [-0.0003  0.0009  0.2606  0.03    0.003 ]]\n",
      "mean_state_value 0.012988909417477192\n",
      "episode 376/600\n",
      "p1 0.9016000000000003 p0 0.024599999999999934\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0142 -0.0491 -0.0476 -0.0126 -0.0222]\n",
      " [-0.0124 -0.0148 -0.0213 -0.0173 -0.0068]\n",
      " [-0.0026 -0.0107  0.0153 -0.008  -0.0023]\n",
      " [-0.0027 -0.002   0.2289  0.0446 -0.0091]\n",
      " [-0.0003  0.0009  0.2617  0.0303  0.0031]]\n",
      "mean_state_value 0.013152475265870524\n",
      "episode 377/600\n",
      "p1 0.9024000000000003 p0 0.024399999999999932\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [3 2 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0141 -0.0487 -0.0472 -0.0125 -0.022 ]\n",
      " [-0.0123 -0.0147 -0.0212 -0.0172 -0.0067]\n",
      " [-0.0026 -0.0106  0.0154 -0.008  -0.0023]\n",
      " [-0.0027 -0.0019  0.2302  0.0446 -0.009 ]\n",
      " [-0.0003  0.0009  0.2629  0.0307  0.0031]]\n",
      "mean_state_value 0.01335383306566663\n",
      "episode 378/600\n",
      "p1 0.9032000000000002 p0 0.024199999999999933\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.014  -0.0483 -0.0468 -0.0124 -0.0218]\n",
      " [-0.0122 -0.0145 -0.021  -0.017  -0.0067]\n",
      " [-0.0026 -0.0106  0.0155 -0.0079 -0.0023]\n",
      " [-0.0027 -0.0019  0.2314  0.0447 -0.0089]\n",
      " [-0.0003  0.0009  0.2641  0.031   0.0032]]\n",
      "mean_state_value 0.01355537065531609\n",
      "episode 379/600\n",
      "p1 0.9040000000000002 p0 0.02399999999999993\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [3 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0139 -0.0479 -0.0465 -0.0123 -0.0216]\n",
      " [-0.0121 -0.0144 -0.0208 -0.0169 -0.0066]\n",
      " [-0.0026 -0.0105  0.0156 -0.0078 -0.0022]\n",
      " [-0.0026 -0.0019  0.2327  0.0447 -0.0088]\n",
      " [-0.0003  0.001   0.2653  0.0313  0.0032]]\n",
      "mean_state_value 0.013756172278852093\n",
      "episode 380/600\n",
      "p1 0.9048000000000003 p0 0.023799999999999932\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0138 -0.0475 -0.0461 -0.0122 -0.0215]\n",
      " [-0.012  -0.0143 -0.0206 -0.0168 -0.0066]\n",
      " [-0.0026 -0.0104  0.0157 -0.0078 -0.0022]\n",
      " [-0.0026 -0.0019  0.2339  0.0448 -0.0088]\n",
      " [-0.0003  0.001   0.2664  0.0317  0.0033]]\n",
      "mean_state_value 0.013957227234153985\n",
      "episode 381/600\n",
      "p1 0.9056000000000003 p0 0.023599999999999934\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0137 -0.0471 -0.0457 -0.0121 -0.0213]\n",
      " [-0.0119 -0.0142 -0.0205 -0.0166 -0.0065]\n",
      " [-0.0025 -0.0103  0.0158 -0.0077 -0.0022]\n",
      " [-0.0026 -0.0019  0.2352  0.0448 -0.0087]\n",
      " [-0.0003  0.001   0.2676  0.032   0.0033]]\n",
      "mean_state_value 0.01415848415678427\n",
      "episode 382/600\n",
      "p1 0.9064000000000003 p0 0.02339999999999993\n",
      "trajectorySteps 20\n",
      "[[2 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 1 0 0 3]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0136 -0.0467 -0.0453 -0.012  -0.0211]\n",
      " [-0.0118 -0.0141 -0.0203 -0.0165 -0.0064]\n",
      " [-0.0025 -0.0102  0.0158 -0.0077 -0.0022]\n",
      " [-0.0026 -0.0019  0.2364  0.0449 -0.0086]\n",
      " [-0.0003  0.001   0.2688  0.0323  0.0034]]\n",
      "mean_state_value 0.014359916222786926\n",
      "episode 383/600\n",
      "p1 0.9072000000000002 p0 0.023199999999999932\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0463 -0.0449 -0.0119 -0.0209]\n",
      " [-0.0117 -0.0139 -0.0201 -0.0163 -0.0064]\n",
      " [-0.0025 -0.0101  0.0159 -0.0076 -0.0022]\n",
      " [-0.0025 -0.0019  0.2377  0.0449 -0.0085]\n",
      " [-0.0003  0.001   0.27    0.0327  0.0034]]\n",
      "mean_state_value 0.014562383516752105\n",
      "episode 384/600\n",
      "p1 0.9080000000000003 p0 0.02299999999999993\n",
      "trajectorySteps 18\n",
      "[[1 2 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0133 -0.0459 -0.0445 -0.0118 -0.0207]\n",
      " [-0.0116 -0.0138 -0.02   -0.0162 -0.0063]\n",
      " [-0.0025 -0.01    0.016  -0.0075 -0.0022]\n",
      " [-0.0025 -0.0018  0.239   0.045  -0.0084]\n",
      " [-0.0003  0.001   0.2712  0.033   0.0035]]\n",
      "mean_state_value 0.014763941777192556\n",
      "episode 385/600\n",
      "p1 0.9088000000000003 p0 0.02279999999999993\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0132 -0.0455 -0.0441 -0.0117 -0.0206]\n",
      " [-0.0115 -0.0137 -0.0198 -0.0161 -0.0063]\n",
      " [-0.0025 -0.0099  0.0161 -0.0075 -0.0021]\n",
      " [-0.0025 -0.0018  0.2402  0.045  -0.0084]\n",
      " [-0.0003  0.001   0.2724  0.0334  0.0035]]\n",
      "mean_state_value 0.014966779494048123\n",
      "episode 386/600\n",
      "p1 0.9096000000000003 p0 0.022599999999999933\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0131 -0.0451 -0.0437 -0.0115 -0.0204]\n",
      " [-0.0114 -0.0136 -0.0196 -0.0159 -0.0062]\n",
      " [-0.0024 -0.0099  0.0162 -0.0074 -0.0021]\n",
      " [-0.0025 -0.0018  0.2415  0.0451 -0.0083]\n",
      " [-0.0003  0.001   0.2736  0.0337  0.0036]]\n",
      "mean_state_value 0.015169797725825123\n",
      "episode 387/600\n",
      "p1 0.9104000000000003 p0 0.02239999999999993\n",
      "trajectorySteps 4\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 1 0 0]\n",
      " [0 0 1 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.013  -0.0447 -0.0434 -0.0114 -0.0202]\n",
      " [-0.0113 -0.0135 -0.0194 -0.0158 -0.0062]\n",
      " [-0.0024 -0.01    0.0174 -0.0073 -0.0021]\n",
      " [-0.0025 -0.0018  0.2427  0.0451 -0.0082]\n",
      " [-0.0003  0.001   0.2739  0.0338  0.0036]]\n",
      "mean_state_value 0.015363300195187559\n",
      "episode 388/600\n",
      "p1 0.9112000000000002 p0 0.02219999999999993\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0129 -0.0443 -0.043  -0.0113 -0.02  ]\n",
      " [-0.0112 -0.0133 -0.0193 -0.0156 -0.0061]\n",
      " [-0.0024 -0.0099  0.0175 -0.0073 -0.0021]\n",
      " [-0.0024 -0.0018  0.2429  0.0452 -0.0081]\n",
      " [-0.0003  0.001   0.2751  0.0342  0.0036]]\n",
      "mean_state_value 0.015520843386235555\n",
      "episode 389/600\n",
      "p1 0.9120000000000003 p0 0.02199999999999993\n",
      "trajectorySteps 21\n",
      "[[2 3 2 1 1]\n",
      " [1 0 0 1 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0128 -0.0439 -0.0426 -0.0112 -0.0198]\n",
      " [-0.0111 -0.0132 -0.0191 -0.0155 -0.0061]\n",
      " [-0.0024 -0.0098  0.0176 -0.0072 -0.0021]\n",
      " [-0.0024 -0.0018  0.2441  0.0452 -0.008 ]\n",
      " [-0.0003  0.001   0.2762  0.0345  0.0036]]\n",
      "mean_state_value 0.015723715906571692\n",
      "episode 390/600\n",
      "p1 0.9128000000000003 p0 0.02179999999999993\n",
      "trajectorySteps 20\n",
      "[[2 2 1 2 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0126 -0.0435 -0.0422 -0.0111 -0.0197]\n",
      " [-0.011  -0.0131 -0.0189 -0.0154 -0.006 ]\n",
      " [-0.0024 -0.0097  0.0176 -0.0071 -0.002 ]\n",
      " [-0.0024 -0.0017  0.2454  0.0453 -0.008 ]\n",
      " [-0.0003  0.001   0.2774  0.0348  0.0037]]\n",
      "mean_state_value 0.015927561526835114\n",
      "episode 391/600\n",
      "p1 0.9136000000000003 p0 0.021599999999999932\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0125 -0.0431 -0.0418 -0.011  -0.0195]\n",
      " [-0.0109 -0.013  -0.0187 -0.0152 -0.0059]\n",
      " [-0.0023 -0.0096  0.0177 -0.0071 -0.002 ]\n",
      " [-0.0024 -0.0017  0.2467  0.0453 -0.0079]\n",
      " [-0.0003  0.001   0.2786  0.0352  0.0037]]\n",
      "mean_state_value 0.016131588125981314\n",
      "episode 392/600\n",
      "p1 0.9144000000000003 p0 0.02139999999999993\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0124 -0.0427 -0.0414 -0.0109 -0.0193]\n",
      " [-0.0108 -0.0129 -0.0186 -0.0151 -0.0059]\n",
      " [-0.0023 -0.0095  0.0178 -0.007  -0.002 ]\n",
      " [-0.0023 -0.0017  0.248   0.0454 -0.0078]\n",
      " [-0.0003  0.0011  0.2798  0.0355  0.0038]]\n",
      "mean_state_value 0.01633541818468378\n",
      "episode 393/600\n",
      "p1 0.9152000000000002 p0 0.02119999999999993\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0123 -0.0423 -0.041  -0.0108 -0.0191]\n",
      " [-0.0107 -0.0127 -0.0184 -0.0149 -0.0058]\n",
      " [-0.0023 -0.0095  0.0179 -0.0069 -0.002 ]\n",
      " [-0.0023 -0.0017  0.2492  0.0454 -0.0077]\n",
      " [-0.0003  0.0011  0.281   0.0359  0.0038]]\n",
      "mean_state_value 0.016539810544864742\n",
      "episode 394/600\n",
      "p1 0.9160000000000003 p0 0.02099999999999993\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0122 -0.0419 -0.0406 -0.0107 -0.019 ]\n",
      " [-0.0107 -0.0126 -0.0182 -0.0148 -0.0058]\n",
      " [-0.0023 -0.0094  0.018  -0.0069 -0.002 ]\n",
      " [-0.0023 -0.0017  0.2505  0.0455 -0.0076]\n",
      " [-0.0003  0.0011  0.2822  0.0362  0.0039]]\n",
      "mean_state_value 0.016743583546664033\n",
      "episode 395/600\n",
      "p1 0.9168000000000003 p0 0.02079999999999993\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0121 -0.0415 -0.0403 -0.0106 -0.0188]\n",
      " [-0.0105 -0.0125 -0.018  -0.0147 -0.0057]\n",
      " [-0.0023 -0.0093  0.018  -0.0068 -0.0019]\n",
      " [-0.0023 -0.0017  0.2518  0.0456 -0.0075]\n",
      " [-0.0003  0.0011  0.2834  0.0366  0.0039]]\n",
      "mean_state_value 0.016948346133229934\n",
      "episode 396/600\n",
      "p1 0.9176000000000003 p0 0.02059999999999993\n",
      "trajectorySteps 19\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0119 -0.0411 -0.0399 -0.0105 -0.0186]\n",
      " [-0.0104 -0.0124 -0.0179 -0.0145 -0.0057]\n",
      " [-0.0022 -0.0092  0.0181 -0.0067 -0.0019]\n",
      " [-0.0023 -0.0016  0.2531  0.0456 -0.0075]\n",
      " [-0.0003  0.0011  0.2846  0.0369  0.004 ]]\n",
      "mean_state_value 0.017153637614902777\n",
      "episode 397/600\n",
      "p1 0.9184000000000003 p0 0.02039999999999993\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0118 -0.0407 -0.0395 -0.0104 -0.0184]\n",
      " [-0.0103 -0.0123 -0.0177 -0.0144 -0.0056]\n",
      " [-0.0022 -0.0091  0.0182 -0.0067 -0.0019]\n",
      " [-0.0022 -0.0016  0.2543  0.0457 -0.0074]\n",
      " [-0.0003  0.0011  0.2858  0.0373  0.0041]]\n",
      "mean_state_value 0.01735877616614306\n",
      "episode 398/600\n",
      "p1 0.9192000000000002 p0 0.02019999999999993\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0117 -0.0403 -0.0391 -0.0103 -0.0182]\n",
      " [-0.0102 -0.0121 -0.0175 -0.0142 -0.0056]\n",
      " [-0.0022 -0.009   0.0183 -0.0066 -0.0019]\n",
      " [-0.0022 -0.0016  0.2556  0.0457 -0.0073]\n",
      " [-0.0003  0.0011  0.2871  0.0376  0.0041]]\n",
      "mean_state_value 0.017563296383667465\n",
      "episode 399/600\n",
      "p1 0.9200000000000003 p0 0.019999999999999928\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0116 -0.0399 -0.0387 -0.0102 -0.018 ]\n",
      " [-0.0101 -0.012  -0.0173 -0.0141 -0.0055]\n",
      " [-0.0022 -0.0089  0.0184 -0.0065 -0.0019]\n",
      " [-0.0022 -0.0016  0.2569  0.0458 -0.0072]\n",
      " [-0.0003  0.0011  0.2883  0.038   0.0042]]\n",
      "mean_state_value 0.01776880619230586\n",
      "episode 400/600\n",
      "p1 0.9208000000000003 p0 0.01979999999999993\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0115 -0.0395 -0.0383 -0.0101 -0.0179]\n",
      " [-0.01   -0.0119 -0.0172 -0.0139 -0.0055]\n",
      " [-0.0022 -0.0088  0.0185 -0.0065 -0.0019]\n",
      " [-0.0022 -0.0016  0.2582  0.0458 -0.0071]\n",
      " [-0.0003  0.0011  0.2895  0.0384  0.0042]]\n",
      "mean_state_value 0.01797450218452438\n",
      "episode 401/600\n",
      "p1 0.9216000000000003 p0 0.01959999999999993\n",
      "trajectorySteps 20\n",
      "[[1 3 3 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0114 -0.0391 -0.0379 -0.01   -0.0177]\n",
      " [-0.0099 -0.0118 -0.017  -0.0138 -0.0054]\n",
      " [-0.0021 -0.0087  0.0185 -0.0064 -0.0018]\n",
      " [-0.0022 -0.0016  0.2586  0.0459 -0.0071]\n",
      " [-0.0003  0.0011  0.2907  0.0387  0.0043]]\n",
      "mean_state_value 0.018143830518737712\n",
      "episode 402/600\n",
      "p1 0.9224000000000003 p0 0.019399999999999928\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0112 -0.0387 -0.0376 -0.0099 -0.0175]\n",
      " [-0.0098 -0.0117 -0.0168 -0.0137 -0.0053]\n",
      " [-0.0021 -0.0087  0.0186 -0.0063 -0.0018]\n",
      " [-0.0021 -0.0015  0.2598  0.0459 -0.007 ]\n",
      " [-0.0003  0.0011  0.2919  0.0391  0.0043]]\n",
      "mean_state_value 0.01834912377631661\n",
      "episode 403/600\n",
      "p1 0.9232000000000002 p0 0.01919999999999993\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.110e-02 -3.830e-02 -3.720e-02 -9.800e-03 -1.730e-02]\n",
      " [-9.700e-03 -1.150e-02 -1.670e-02 -1.350e-02 -5.300e-03]\n",
      " [-2.100e-03 -8.600e-03  1.870e-02 -6.300e-03 -1.800e-03]\n",
      " [-2.100e-03 -1.500e-03  2.602e-01  4.600e-02 -6.900e-03]\n",
      " [-2.000e-04  1.100e-03  2.931e-01  3.940e-02  4.400e-03]]\n",
      "mean_state_value 0.018518685403837352\n",
      "episode 404/600\n",
      "p1 0.9240000000000003 p0 0.018999999999999927\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.100e-02 -3.790e-02 -3.680e-02 -9.700e-03 -1.710e-02]\n",
      " [-9.600e-03 -1.140e-02 -1.650e-02 -1.340e-02 -5.200e-03]\n",
      " [-2.100e-03 -8.500e-03  1.880e-02 -6.200e-03 -1.800e-03]\n",
      " [-2.100e-03 -1.500e-03  2.615e-01  4.600e-02 -6.800e-03]\n",
      " [-2.000e-04  1.200e-03  2.943e-01  3.980e-02  4.400e-03]]\n",
      "mean_state_value 0.018724970968537733\n",
      "episode 405/600\n",
      "p1 0.9248000000000003 p0 0.018799999999999928\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.090e-02 -3.750e-02 -3.640e-02 -9.600e-03 -1.700e-02]\n",
      " [-9.500e-03 -1.130e-02 -1.630e-02 -1.320e-02 -5.200e-03]\n",
      " [-2.100e-03 -8.400e-03  1.890e-02 -6.100e-03 -1.800e-03]\n",
      " [-2.100e-03 -1.500e-03  2.628e-01  4.610e-02 -6.700e-03]\n",
      " [-2.000e-04  1.200e-03  2.955e-01  4.010e-02  4.500e-03]]\n",
      "mean_state_value 0.01893143867538469\n",
      "episode 406/600\n",
      "p1 0.9256000000000003 p0 0.01859999999999993\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.080e-02 -3.710e-02 -3.600e-02 -9.500e-03 -1.680e-02]\n",
      " [-9.400e-03 -1.120e-02 -1.610e-02 -1.310e-02 -5.100e-03]\n",
      " [-2.100e-03 -8.300e-03  1.900e-02 -6.100e-03 -1.700e-03]\n",
      " [-2.100e-03 -1.500e-03  2.641e-01  4.610e-02 -6.600e-03]\n",
      " [-2.000e-04  1.200e-03  2.968e-01  4.050e-02  4.600e-03]]\n",
      "mean_state_value 0.01913742614164279\n",
      "episode 407/600\n",
      "p1 0.9264000000000003 p0 0.018399999999999927\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.070e-02 -3.670e-02 -3.560e-02 -9.400e-03 -1.660e-02]\n",
      " [-9.300e-03 -1.110e-02 -1.600e-02 -1.300e-02 -5.100e-03]\n",
      " [-2.000e-03 -8.200e-03  1.900e-02 -6.000e-03 -1.700e-03]\n",
      " [-2.000e-03 -1.500e-03  2.654e-01  4.620e-02 -6.600e-03]\n",
      " [-2.000e-04  1.200e-03  2.980e-01  4.090e-02  4.600e-03]]\n",
      "mean_state_value 0.019344265624312938\n",
      "episode 408/600\n",
      "p1 0.9272000000000002 p0 0.018199999999999928\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.050e-02 -3.630e-02 -3.520e-02 -9.300e-03 -1.640e-02]\n",
      " [-9.200e-03 -1.090e-02 -1.580e-02 -1.280e-02 -5.000e-03]\n",
      " [-2.000e-03 -8.100e-03  1.910e-02 -6.000e-03 -1.700e-03]\n",
      " [-2.000e-03 -1.500e-03  2.667e-01  4.620e-02 -6.500e-03]\n",
      " [-2.000e-04  1.200e-03  2.992e-01  4.120e-02  4.700e-03]]\n",
      "mean_state_value 0.019551287433540142\n",
      "episode 409/600\n",
      "p1 0.9280000000000003 p0 0.017999999999999926\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.040e-02 -3.590e-02 -3.480e-02 -9.200e-03 -1.620e-02]\n",
      " [-9.100e-03 -1.080e-02 -1.560e-02 -1.270e-02 -5.000e-03]\n",
      " [-2.000e-03 -8.000e-03  1.920e-02 -5.900e-03 -1.700e-03]\n",
      " [-2.000e-03 -1.400e-03  2.680e-01  4.630e-02 -6.400e-03]\n",
      " [-2.000e-04  1.200e-03  3.004e-01  4.160e-02  4.700e-03]]\n",
      "mean_state_value 0.019758491752846438\n",
      "episode 410/600\n",
      "p1 0.9288000000000003 p0 0.017799999999999927\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.030e-02 -3.550e-02 -3.450e-02 -9.100e-03 -1.610e-02]\n",
      " [-9.000e-03 -1.070e-02 -1.540e-02 -1.250e-02 -4.900e-03]\n",
      " [-2.000e-03 -7.900e-03  1.930e-02 -5.800e-03 -1.700e-03]\n",
      " [-2.000e-03 -1.400e-03  2.693e-01  4.630e-02 -6.300e-03]\n",
      " [-2.000e-04  1.200e-03  3.017e-01  4.200e-02  4.800e-03]]\n",
      "mean_state_value 0.019965188505520405\n",
      "episode 411/600\n",
      "p1 0.9296000000000003 p0 0.017599999999999928\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.020e-02 -3.510e-02 -3.410e-02 -9.000e-03 -1.590e-02]\n",
      " [-8.900e-03 -1.060e-02 -1.530e-02 -1.240e-02 -4.800e-03]\n",
      " [-1.900e-03 -7.800e-03  1.940e-02 -5.800e-03 -1.600e-03]\n",
      " [-1.900e-03 -1.400e-03  2.706e-01  4.640e-02 -6.200e-03]\n",
      " [-2.000e-04  1.200e-03  3.029e-01  4.230e-02  4.800e-03]]\n",
      "mean_state_value 0.02017276587544838\n",
      "episode 412/600\n",
      "p1 0.9304000000000003 p0 0.017399999999999926\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.010e-02 -3.470e-02 -3.370e-02 -8.900e-03 -1.570e-02]\n",
      " [-8.800e-03 -1.050e-02 -1.510e-02 -1.230e-02 -4.800e-03]\n",
      " [-1.900e-03 -7.800e-03  1.950e-02 -5.700e-03 -1.600e-03]\n",
      " [-1.900e-03 -1.400e-03  2.719e-01  4.650e-02 -6.200e-03]\n",
      " [-2.000e-04  1.200e-03  3.041e-01  4.270e-02  4.900e-03]]\n",
      "mean_state_value 0.02038052603121024\n",
      "episode 413/600\n",
      "p1 0.9312000000000002 p0 0.017199999999999927\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.000e-02 -3.430e-02 -3.330e-02 -8.800e-03 -1.550e-02]\n",
      " [-8.700e-03 -1.030e-02 -1.490e-02 -1.210e-02 -4.700e-03]\n",
      " [-1.900e-03 -7.700e-03  1.950e-02 -5.600e-03 -1.600e-03]\n",
      " [-1.900e-03 -1.400e-03  2.732e-01  4.650e-02 -6.100e-03]\n",
      " [-2.000e-04  1.200e-03  3.053e-01  4.310e-02  4.900e-03]]\n",
      "mean_state_value 0.02058785706593325\n",
      "episode 414/600\n",
      "p1 0.9320000000000003 p0 0.016999999999999925\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-9.900e-03 -3.390e-02 -3.290e-02 -8.700e-03 -1.530e-02]\n",
      " [-8.600e-03 -1.020e-02 -1.470e-02 -1.200e-02 -4.700e-03]\n",
      " [-1.900e-03 -7.600e-03  1.960e-02 -5.600e-03 -1.600e-03]\n",
      " [-1.900e-03 -1.400e-03  2.745e-01  4.660e-02 -6.000e-03]\n",
      " [-2.000e-04  1.200e-03  3.066e-01  4.340e-02  5.000e-03]]\n",
      "mean_state_value 0.02079599018536523\n",
      "episode 415/600\n",
      "p1 0.9328000000000003 p0 0.016799999999999926\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-9.700e-03 -3.350e-02 -3.250e-02 -8.600e-03 -1.520e-02]\n",
      " [-8.500e-03 -1.010e-02 -1.460e-02 -1.180e-02 -4.600e-03]\n",
      " [-1.900e-03 -7.500e-03  1.970e-02 -5.500e-03 -1.600e-03]\n",
      " [-1.900e-03 -1.300e-03  2.747e-01  4.660e-02 -5.900e-03]\n",
      " [-2.000e-04  1.200e-03  3.078e-01  4.380e-02  5.000e-03]]\n",
      "mean_state_value 0.020959864440934815\n",
      "episode 416/600\n",
      "p1 0.9336000000000003 p0 0.016599999999999927\n",
      "trajectorySteps 16\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-9.60e-03 -3.31e-02 -3.21e-02 -8.50e-03 -1.50e-02]\n",
      " [-8.40e-03 -1.00e-02 -1.44e-02 -1.17e-02 -4.60e-03]\n",
      " [-1.90e-03 -7.40e-03  1.98e-02 -5.40e-03 -1.50e-03]\n",
      " [-1.80e-03 -1.30e-03  2.76e-01  4.67e-02 -5.80e-03]\n",
      " [-2.00e-04  1.30e-03  3.09e-01  4.42e-02  5.10e-03]]\n",
      "mean_state_value 0.02116779106462466\n",
      "episode 417/600\n",
      "p1 0.9344000000000003 p0 0.016399999999999925\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-9.500e-03 -3.270e-02 -3.170e-02 -8.400e-03 -1.480e-02]\n",
      " [-8.300e-03 -9.900e-03 -1.420e-02 -1.160e-02 -4.500e-03]\n",
      " [-1.900e-03 -7.300e-03  1.990e-02 -5.400e-03 -1.500e-03]\n",
      " [-1.800e-03 -1.300e-03  2.773e-01  4.670e-02 -5.800e-03]\n",
      " [-2.000e-04  1.300e-03  3.103e-01  4.460e-02  5.200e-03]]\n",
      "mean_state_value 0.021375915673083584\n",
      "episode 418/600\n",
      "p1 0.9352000000000003 p0 0.016199999999999926\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-9.400e-03 -3.230e-02 -3.140e-02 -8.300e-03 -1.460e-02]\n",
      " [-8.200e-03 -9.700e-03 -1.410e-02 -1.140e-02 -4.500e-03]\n",
      " [-1.800e-03 -7.200e-03  2.000e-02 -5.300e-03 -1.500e-03]\n",
      " [-1.800e-03 -1.300e-03  2.786e-01  4.680e-02 -5.700e-03]\n",
      " [-2.000e-04  1.300e-03  3.115e-01  4.490e-02  5.200e-03]]\n",
      "mean_state_value 0.0215845587533684\n",
      "episode 419/600\n",
      "p1 0.9360000000000003 p0 0.015999999999999924\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-9.300e-03 -3.190e-02 -3.100e-02 -8.200e-03 -1.440e-02]\n",
      " [-8.100e-03 -9.600e-03 -1.390e-02 -1.130e-02 -4.400e-03]\n",
      " [-1.800e-03 -7.100e-03  2.000e-02 -5.200e-03 -1.500e-03]\n",
      " [-1.800e-03 -1.300e-03  2.800e-01  4.680e-02 -5.600e-03]\n",
      " [-2.000e-04  1.300e-03  3.127e-01  4.530e-02  5.300e-03]]\n",
      "mean_state_value 0.02179364261302919\n",
      "episode 420/600\n",
      "p1 0.9368000000000003 p0 0.015799999999999925\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-9.200e-03 -3.150e-02 -3.060e-02 -8.100e-03 -1.430e-02]\n",
      " [-8.000e-03 -9.500e-03 -1.370e-02 -1.110e-02 -4.400e-03]\n",
      " [-1.800e-03 -7.000e-03  2.010e-02 -5.200e-03 -1.500e-03]\n",
      " [-1.700e-03 -1.300e-03  2.813e-01  4.690e-02 -5.500e-03]\n",
      " [-2.000e-04  1.300e-03  3.140e-01  4.570e-02  5.300e-03]]\n",
      "mean_state_value 0.02200257107508268\n",
      "episode 421/600\n",
      "p1 0.9376000000000003 p0 0.015599999999999925\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-9.100e-03 -3.110e-02 -3.020e-02 -8.000e-03 -1.410e-02]\n",
      " [-7.900e-03 -9.400e-03 -1.350e-02 -1.100e-02 -4.300e-03]\n",
      " [-1.800e-03 -7.000e-03  2.020e-02 -5.100e-03 -1.400e-03]\n",
      " [-1.700e-03 -1.200e-03  2.826e-01  4.690e-02 -5.400e-03]\n",
      " [-2.000e-04  1.300e-03  3.152e-01  4.610e-02  5.400e-03]]\n",
      "mean_state_value 0.022211472829285008\n",
      "episode 422/600\n",
      "p1 0.9384000000000003 p0 0.015399999999999924\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.900e-03 -3.070e-02 -2.980e-02 -7.900e-03 -1.390e-02]\n",
      " [-7.800e-03 -9.300e-03 -1.340e-02 -1.080e-02 -4.200e-03]\n",
      " [-1.800e-03 -6.900e-03  2.030e-02 -5.000e-03 -1.400e-03]\n",
      " [-1.700e-03 -1.200e-03  2.839e-01  4.700e-02 -5.300e-03]\n",
      " [-2.000e-04  1.300e-03  3.165e-01  4.650e-02  5.500e-03]]\n",
      "mean_state_value 0.0224211187265601\n",
      "episode 423/600\n",
      "p1 0.9392000000000003 p0 0.015199999999999925\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.800e-03 -3.030e-02 -2.940e-02 -7.800e-03 -1.370e-02]\n",
      " [-7.700e-03 -9.100e-03 -1.320e-02 -1.070e-02 -4.200e-03]\n",
      " [-1.700e-03 -6.800e-03  2.040e-02 -5.000e-03 -1.400e-03]\n",
      " [-1.700e-03 -1.200e-03  2.852e-01  4.700e-02 -5.300e-03]\n",
      " [-2.000e-04  1.300e-03  3.177e-01  4.680e-02  5.500e-03]]\n",
      "mean_state_value 0.02263094835873564\n",
      "episode 424/600\n",
      "p1 0.9400000000000003 p0 0.014999999999999925\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.700e-03 -2.990e-02 -2.900e-02 -7.700e-03 -1.350e-02]\n",
      " [-7.600e-03 -9.000e-03 -1.300e-02 -1.060e-02 -4.100e-03]\n",
      " [-1.700e-03 -6.700e-03  2.040e-02 -4.900e-03 -1.400e-03]\n",
      " [-1.700e-03 -1.200e-03  2.866e-01  4.710e-02 -5.200e-03]\n",
      " [-2.000e-04  1.300e-03  3.190e-01  4.720e-02  5.600e-03]]\n",
      "mean_state_value 0.02284065624237321\n",
      "episode 425/600\n",
      "p1 0.9408000000000003 p0 0.014799999999999924\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.600e-03 -2.950e-02 -2.860e-02 -7.600e-03 -1.340e-02]\n",
      " [-7.500e-03 -8.900e-03 -1.280e-02 -1.040e-02 -4.100e-03]\n",
      " [-1.700e-03 -6.600e-03  2.050e-02 -4.800e-03 -1.400e-03]\n",
      " [-1.600e-03 -1.200e-03  2.879e-01  4.710e-02 -5.100e-03]\n",
      " [-2.000e-04  1.300e-03  3.202e-01  4.760e-02  5.600e-03]]\n",
      "mean_state_value 0.023050336916771896\n",
      "episode 426/600\n",
      "p1 0.9416000000000003 p0 0.014599999999999924\n",
      "trajectorySteps 15\n",
      "[[1 1 1 0 0]\n",
      " [1 0 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.500e-03 -2.910e-02 -2.840e-02 -7.500e-03 -1.320e-02]\n",
      " [-7.400e-03 -8.800e-03 -1.270e-02 -1.030e-02 -4.000e-03]\n",
      " [-1.700e-03 -6.500e-03  2.060e-02 -4.800e-03 -1.300e-03]\n",
      " [-1.600e-03 -1.200e-03  2.883e-01  4.720e-02 -5.000e-03]\n",
      " [-2.000e-04  1.300e-03  3.214e-01  4.800e-02  5.700e-03]]\n",
      "mean_state_value 0.023217835618123576\n",
      "episode 427/600\n",
      "p1 0.9424000000000003 p0 0.014399999999999923\n",
      "trajectorySteps 16\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.400e-03 -2.870e-02 -2.800e-02 -7.400e-03 -1.300e-02]\n",
      " [-7.300e-03 -8.700e-03 -1.250e-02 -1.010e-02 -4.000e-03]\n",
      " [-1.700e-03 -6.400e-03  2.070e-02 -4.700e-03 -1.300e-03]\n",
      " [-1.600e-03 -1.200e-03  2.896e-01  4.720e-02 -4.900e-03]\n",
      " [-2.000e-04  1.300e-03  3.227e-01  4.840e-02  5.800e-03]]\n",
      "mean_state_value 0.02342841947928322\n",
      "episode 428/600\n",
      "p1 0.9432000000000003 p0 0.014199999999999924\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.200e-03 -2.830e-02 -2.760e-02 -7.300e-03 -1.280e-02]\n",
      " [-7.200e-03 -8.500e-03 -1.230e-02 -1.000e-02 -3.900e-03]\n",
      " [-1.700e-03 -6.300e-03  2.080e-02 -4.600e-03 -1.300e-03]\n",
      " [-1.600e-03 -1.100e-03  2.909e-01  4.730e-02 -4.800e-03]\n",
      " [-2.000e-04  1.400e-03  3.239e-01  4.870e-02  5.800e-03]]\n",
      "mean_state_value 0.023639187472907087\n",
      "episode 429/600\n",
      "p1 0.9440000000000003 p0 0.013999999999999924\n",
      "trajectorySteps 16\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.100e-03 -2.790e-02 -2.720e-02 -7.200e-03 -1.260e-02]\n",
      " [-7.100e-03 -8.400e-03 -1.210e-02 -9.900e-03 -3.900e-03]\n",
      " [-1.600e-03 -6.200e-03  2.090e-02 -4.600e-03 -1.300e-03]\n",
      " [-1.500e-03 -1.100e-03  2.922e-01  4.730e-02 -4.800e-03]\n",
      " [-2.000e-04  1.400e-03  3.252e-01  4.910e-02  5.900e-03]]\n",
      "mean_state_value 0.02384964502886311\n",
      "episode 430/600\n",
      "p1 0.9448000000000003 p0 0.013799999999999923\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-8.000e-03 -2.750e-02 -2.680e-02 -7.100e-03 -1.250e-02]\n",
      " [-7.000e-03 -8.300e-03 -1.200e-02 -9.700e-03 -3.800e-03]\n",
      " [-1.600e-03 -6.200e-03  2.090e-02 -4.500e-03 -1.300e-03]\n",
      " [-1.500e-03 -1.100e-03  2.936e-01  4.740e-02 -4.700e-03]\n",
      " [-2.000e-04  1.400e-03  3.265e-01  4.950e-02  6.000e-03]]\n",
      "mean_state_value 0.024060788629655973\n",
      "episode 431/600\n",
      "p1 0.9456000000000003 p0 0.013599999999999923\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.900e-03 -2.710e-02 -2.640e-02 -7.000e-03 -1.230e-02]\n",
      " [-6.900e-03 -8.200e-03 -1.180e-02 -9.600e-03 -3.800e-03]\n",
      " [-1.600e-03 -6.100e-03  2.100e-02 -4.400e-03 -1.200e-03]\n",
      " [-1.500e-03 -1.100e-03  2.940e-01  4.750e-02 -4.600e-03]\n",
      " [-2.000e-04  1.400e-03  3.277e-01  4.990e-02  6.000e-03]]\n",
      "mean_state_value 0.024234082763992717\n",
      "episode 432/600\n",
      "p1 0.9464000000000004 p0 0.013399999999999922\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.800e-03 -2.670e-02 -2.610e-02 -6.900e-03 -1.210e-02]\n",
      " [-6.800e-03 -8.100e-03 -1.160e-02 -9.400e-03 -3.700e-03]\n",
      " [-1.600e-03 -6.000e-03  2.110e-02 -4.400e-03 -1.200e-03]\n",
      " [-1.500e-03 -1.100e-03  2.953e-01  4.750e-02 -4.500e-03]\n",
      " [-2.000e-04  1.400e-03  3.290e-01  5.030e-02  6.100e-03]]\n",
      "mean_state_value 0.024445058131365646\n",
      "episode 433/600\n",
      "p1 0.9472000000000003 p0 0.013199999999999924\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 2]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.700e-03 -2.630e-02 -2.570e-02 -6.800e-03 -1.190e-02]\n",
      " [-6.700e-03 -7.900e-03 -1.150e-02 -9.300e-03 -3.600e-03]\n",
      " [-1.600e-03 -5.900e-03  2.120e-02 -4.300e-03 -1.200e-03]\n",
      " [-1.500e-03 -1.100e-03  2.966e-01  4.760e-02 -4.400e-03]\n",
      " [-2.000e-04  1.400e-03  3.302e-01  5.070e-02  6.100e-03]]\n",
      "mean_state_value 0.02465672164836988\n",
      "episode 434/600\n",
      "p1 0.9480000000000003 p0 0.012999999999999923\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.500e-03 -2.590e-02 -2.530e-02 -6.600e-03 -1.170e-02]\n",
      " [-6.600e-03 -7.800e-03 -1.130e-02 -9.200e-03 -3.600e-03]\n",
      " [-1.600e-03 -5.800e-03  2.130e-02 -4.300e-03 -1.200e-03]\n",
      " [-1.400e-03 -1.000e-03  2.980e-01  4.760e-02 -4.300e-03]\n",
      " [-2.000e-04  1.400e-03  3.315e-01  5.110e-02  6.200e-03]]\n",
      "mean_state_value 0.024868087803050373\n",
      "episode 435/600\n",
      "p1 0.9488000000000003 p0 0.012799999999999923\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.400e-03 -2.550e-02 -2.490e-02 -6.500e-03 -1.160e-02]\n",
      " [-6.500e-03 -7.700e-03 -1.110e-02 -9.000e-03 -3.500e-03]\n",
      " [-1.500e-03 -5.700e-03  2.140e-02 -4.200e-03 -1.200e-03]\n",
      " [-1.400e-03 -1.000e-03  2.993e-01  4.770e-02 -4.300e-03]\n",
      " [-2.000e-04  1.400e-03  3.327e-01  5.150e-02  6.300e-03]]\n",
      "mean_state_value 0.025080103669288128\n",
      "episode 436/600\n",
      "p1 0.9496000000000003 p0 0.012599999999999922\n",
      "trajectorySteps 19\n",
      "[[1 1 1 2 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.300e-03 -2.510e-02 -2.450e-02 -6.500e-03 -1.140e-02]\n",
      " [-6.400e-03 -7.600e-03 -1.090e-02 -8.900e-03 -3.500e-03]\n",
      " [-1.500e-03 -5.600e-03  2.140e-02 -4.100e-03 -1.100e-03]\n",
      " [-1.400e-03 -1.000e-03  3.007e-01  4.770e-02 -4.200e-03]\n",
      " [-2.000e-04  1.400e-03  3.340e-01  5.190e-02  6.300e-03]]\n",
      "mean_state_value 0.025292058258153206\n",
      "episode 437/600\n",
      "p1 0.9504000000000004 p0 0.012399999999999922\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.200e-03 -2.470e-02 -2.410e-02 -6.300e-03 -1.120e-02]\n",
      " [-6.300e-03 -7.400e-03 -1.080e-02 -8.700e-03 -3.400e-03]\n",
      " [-1.500e-03 -5.500e-03  2.150e-02 -4.100e-03 -1.100e-03]\n",
      " [-1.400e-03 -1.000e-03  3.020e-01  4.780e-02 -4.100e-03]\n",
      " [-2.000e-04  1.400e-03  3.353e-01  5.230e-02  6.400e-03]]\n",
      "mean_state_value 0.025503996454203922\n",
      "episode 438/600\n",
      "p1 0.9512000000000003 p0 0.012199999999999923\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.100e-03 -2.430e-02 -2.370e-02 -6.200e-03 -1.100e-02]\n",
      " [-6.200e-03 -7.300e-03 -1.060e-02 -8.600e-03 -3.400e-03]\n",
      " [-1.500e-03 -5.400e-03  2.160e-02 -4.000e-03 -1.100e-03]\n",
      " [-1.300e-03 -1.000e-03  3.033e-01  4.780e-02 -4.000e-03]\n",
      " [-2.000e-04  1.400e-03  3.365e-01  5.270e-02  6.500e-03]]\n",
      "mean_state_value 0.025716829972713434\n",
      "episode 439/600\n",
      "p1 0.9520000000000003 p0 0.011999999999999922\n",
      "trajectorySteps 17\n",
      "[[1 1 1 2 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-7.000e-03 -2.390e-02 -2.330e-02 -6.100e-03 -1.080e-02]\n",
      " [-6.100e-03 -7.200e-03 -1.040e-02 -8.500e-03 -3.300e-03]\n",
      " [-1.400e-03 -5.400e-03  2.170e-02 -3.900e-03 -1.100e-03]\n",
      " [-1.300e-03 -1.000e-03  3.047e-01  4.790e-02 -3.900e-03]\n",
      " [-2.000e-04  1.400e-03  3.378e-01  5.310e-02  6.500e-03]]\n",
      "mean_state_value 0.025929597011202024\n",
      "episode 440/600\n",
      "p1 0.9528000000000003 p0 0.011799999999999922\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-6.800e-03 -2.350e-02 -2.290e-02 -6.000e-03 -1.060e-02]\n",
      " [-6.000e-03 -7.100e-03 -1.020e-02 -8.300e-03 -3.300e-03]\n",
      " [-1.400e-03 -5.300e-03  2.180e-02 -3.900e-03 -1.100e-03]\n",
      " [-1.300e-03 -9.000e-04  3.060e-01  4.790e-02 -3.800e-03]\n",
      " [-2.000e-04  1.400e-03  3.391e-01  5.350e-02  6.600e-03]]\n",
      "mean_state_value 0.026142549214208582\n",
      "episode 441/600\n",
      "p1 0.9536000000000003 p0 0.011599999999999921\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-6.700e-03 -2.320e-02 -2.260e-02 -5.900e-03 -1.050e-02]\n",
      " [-5.900e-03 -7.000e-03 -1.010e-02 -8.200e-03 -3.200e-03]\n",
      " [-1.400e-03 -5.200e-03  2.190e-02 -3.800e-03 -1.000e-03]\n",
      " [-1.300e-03 -9.000e-04  3.074e-01  4.800e-02 -3.700e-03]\n",
      " [-2.000e-04  1.500e-03  3.403e-01  5.380e-02  6.700e-03]]\n",
      "mean_state_value 0.02635485725484269\n",
      "episode 442/600\n",
      "p1 0.9544000000000004 p0 0.01139999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-6.600e-03 -2.280e-02 -2.220e-02 -5.800e-03 -1.030e-02]\n",
      " [-5.800e-03 -6.800e-03 -9.900e-03 -8.000e-03 -3.100e-03]\n",
      " [-1.400e-03 -5.100e-03  2.190e-02 -3.700e-03 -1.000e-03]\n",
      " [-1.300e-03 -9.000e-04  3.087e-01  4.800e-02 -3.700e-03]\n",
      " [-1.000e-04  1.500e-03  3.416e-01  5.420e-02  6.700e-03]]\n",
      "mean_state_value 0.026568194379874524\n",
      "episode 443/600\n",
      "p1 0.9552000000000003 p0 0.011199999999999922\n",
      "trajectorySteps 13\n",
      "[[1 1 2 0 0]\n",
      " [1 0 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 1 1]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-6.500e-03 -2.240e-02 -2.190e-02 -5.700e-03 -1.010e-02]\n",
      " [-5.700e-03 -6.700e-03 -9.700e-03 -7.900e-03 -3.100e-03]\n",
      " [-1.400e-03 -5.000e-03  2.200e-02 -3.700e-03 -1.000e-03]\n",
      " [-1.200e-03 -9.000e-04  3.101e-01  4.930e-02 -3.700e-03]\n",
      " [-1.000e-04  1.500e-03  3.420e-01  5.440e-02  6.700e-03]]\n",
      "mean_state_value 0.0267715499631078\n",
      "episode 444/600\n",
      "p1 0.9560000000000003 p0 0.010999999999999921\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-6.400e-03 -2.200e-02 -2.150e-02 -5.600e-03 -9.900e-03]\n",
      " [-5.600e-03 -6.600e-03 -9.500e-03 -7.700e-03 -3.000e-03]\n",
      " [-1.300e-03 -4.900e-03  2.210e-02 -3.600e-03 -1.000e-03]\n",
      " [-1.200e-03 -9.000e-04  3.114e-01  4.940e-02 -3.600e-03]\n",
      " [-1.000e-04  1.500e-03  3.432e-01  5.480e-02  6.800e-03]]\n",
      "mean_state_value 0.026985406355833047\n",
      "episode 445/600\n",
      "p1 0.9568000000000003 p0 0.01079999999999992\n",
      "trajectorySteps 17\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-6.300e-03 -2.160e-02 -2.110e-02 -5.500e-03 -9.700e-03]\n",
      " [-5.500e-03 -6.500e-03 -9.400e-03 -7.600e-03 -3.000e-03]\n",
      " [-1.300e-03 -4.800e-03  2.220e-02 -3.500e-03 -1.000e-03]\n",
      " [-1.200e-03 -9.000e-04  3.128e-01  4.940e-02 -3.500e-03]\n",
      " [-1.000e-04  1.500e-03  3.445e-01  5.520e-02  6.900e-03]]\n",
      "mean_state_value 0.027199448381628816\n",
      "episode 446/600\n",
      "p1 0.9576000000000003 p0 0.01059999999999992\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-6.100e-03 -2.120e-02 -2.070e-02 -5.400e-03 -9.600e-03]\n",
      " [-5.400e-03 -6.400e-03 -9.200e-03 -7.500e-03 -2.900e-03]\n",
      " [-1.300e-03 -4.700e-03  2.230e-02 -3.500e-03 -9.000e-04]\n",
      " [-1.200e-03 -8.000e-04  3.132e-01  4.950e-02 -3.400e-03]\n",
      " [-1.000e-04  1.500e-03  3.458e-01  5.560e-02  6.900e-03]]\n",
      "mean_state_value 0.027375613162925187\n",
      "episode 447/600\n",
      "p1 0.9584000000000004 p0 0.01039999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-6.000e-03 -2.080e-02 -2.030e-02 -5.300e-03 -9.400e-03]\n",
      " [-5.300e-03 -6.200e-03 -9.000e-03 -7.300e-03 -2.900e-03]\n",
      " [-1.300e-03 -4.600e-03  2.240e-02 -3.400e-03 -9.000e-04]\n",
      " [-1.100e-03 -8.000e-04  3.145e-01  4.950e-02 -3.300e-03]\n",
      " [-1.000e-04  1.500e-03  3.471e-01  5.600e-02  7.000e-03]]\n",
      "mean_state_value 0.02758958885136842\n",
      "episode 448/600\n",
      "p1 0.9592000000000003 p0 0.010199999999999921\n",
      "trajectorySteps 16\n",
      "[[1 1 2 2 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.900e-03 -2.040e-02 -1.990e-02 -5.200e-03 -9.200e-03]\n",
      " [-5.200e-03 -6.100e-03 -8.800e-03 -7.200e-03 -2.800e-03]\n",
      " [-1.200e-03 -4.500e-03  2.240e-02 -3.300e-03 -9.000e-04]\n",
      " [-1.100e-03 -8.000e-04  3.159e-01  4.960e-02 -3.300e-03]\n",
      " [-1.000e-04  1.500e-03  3.483e-01  5.640e-02  7.100e-03]]\n",
      "mean_state_value 0.02780412304799016\n",
      "episode 449/600\n",
      "p1 0.9600000000000003 p0 0.00999999999999992\n",
      "trajectorySteps 17\n",
      "[[1 1 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.800e-03 -2.000e-02 -1.950e-02 -5.100e-03 -9.000e-03]\n",
      " [-5.100e-03 -6.000e-03 -8.700e-03 -7.000e-03 -2.800e-03]\n",
      " [-1.200e-03 -4.500e-03  2.250e-02 -3.300e-03 -9.000e-04]\n",
      " [-1.100e-03 -8.000e-04  3.173e-01  4.960e-02 -3.200e-03]\n",
      " [-1.000e-04  1.500e-03  3.496e-01  5.680e-02  7.100e-03]]\n",
      "mean_state_value 0.02801863958888778\n",
      "episode 450/600\n",
      "p1 0.9608000000000003 p0 0.00979999999999992\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.700e-03 -1.960e-02 -1.910e-02 -5.000e-03 -8.800e-03]\n",
      " [-5.000e-03 -5.900e-03 -8.500e-03 -6.900e-03 -2.700e-03]\n",
      " [-1.200e-03 -4.400e-03  2.260e-02 -3.200e-03 -9.000e-04]\n",
      " [-1.100e-03 -8.000e-04  3.186e-01  4.970e-02 -3.100e-03]\n",
      " [-1.000e-04  1.500e-03  3.509e-01  5.720e-02  7.200e-03]]\n",
      "mean_state_value 0.02823354982587504\n",
      "episode 451/600\n",
      "p1 0.9616000000000003 p0 0.00959999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.600e-03 -1.920e-02 -1.880e-02 -4.900e-03 -8.700e-03]\n",
      " [-4.900e-03 -5.800e-03 -8.300e-03 -6.800e-03 -2.600e-03]\n",
      " [-1.200e-03 -4.300e-03  2.270e-02 -3.100e-03 -8.000e-04]\n",
      " [-1.100e-03 -8.000e-04  3.200e-01  4.970e-02 -3.000e-03]\n",
      " [-1.000e-04  1.500e-03  3.522e-01  5.760e-02  7.300e-03]]\n",
      "mean_state_value 0.028448646185104568\n",
      "episode 452/600\n",
      "p1 0.9624000000000004 p0 0.009399999999999919\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.500e-03 -1.880e-02 -1.840e-02 -4.800e-03 -8.500e-03]\n",
      " [-4.800e-03 -5.600e-03 -8.200e-03 -6.600e-03 -2.600e-03]\n",
      " [-1.100e-03 -4.200e-03  2.280e-02 -3.100e-03 -8.000e-04]\n",
      " [-1.000e-03 -8.000e-04  3.214e-01  4.980e-02 -2.900e-03]\n",
      " [-1.000e-04  1.500e-03  3.535e-01  5.800e-02  7.300e-03]]\n",
      "mean_state_value 0.028663928758400056\n",
      "episode 453/600\n",
      "p1 0.9632000000000003 p0 0.00919999999999992\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.300e-03 -1.840e-02 -1.800e-02 -4.700e-03 -8.300e-03]\n",
      " [-4.700e-03 -5.500e-03 -8.000e-03 -6.500e-03 -2.500e-03]\n",
      " [-1.100e-03 -4.100e-03  2.280e-02 -3.000e-03 -8.000e-04]\n",
      " [-1.000e-03 -7.000e-04  3.227e-01  4.980e-02 -2.800e-03]\n",
      " [-1.000e-04  1.600e-03  3.548e-01  5.840e-02  7.400e-03]]\n",
      "mean_state_value 0.028879040608994863\n",
      "episode 454/600\n",
      "p1 0.9640000000000003 p0 0.00899999999999992\n",
      "trajectorySteps 16\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.200e-03 -1.800e-02 -1.760e-02 -4.600e-03 -8.100e-03]\n",
      " [-4.600e-03 -5.400e-03 -7.800e-03 -6.300e-03 -2.500e-03]\n",
      " [-1.100e-03 -4.000e-03  2.290e-02 -2.900e-03 -8.000e-04]\n",
      " [-1.000e-03 -7.000e-04  3.241e-01  4.990e-02 -2.700e-03]\n",
      " [-1.000e-04  1.600e-03  3.561e-01  5.890e-02  7.500e-03]]\n",
      "mean_state_value 0.029094703647374508\n",
      "episode 455/600\n",
      "p1 0.9648000000000003 p0 0.008799999999999919\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.100e-03 -1.760e-02 -1.720e-02 -4.500e-03 -7.900e-03]\n",
      " [-4.500e-03 -5.300e-03 -7.600e-03 -6.200e-03 -2.400e-03]\n",
      " [-1.100e-03 -3.900e-03  2.300e-02 -2.900e-03 -8.000e-04]\n",
      " [-1.000e-03 -7.000e-04  3.255e-01  5.000e-02 -2.700e-03]\n",
      " [-1.000e-04  1.600e-03  3.574e-01  5.930e-02  7.500e-03]]\n",
      "mean_state_value 0.029310553175276314\n",
      "episode 456/600\n",
      "p1 0.9656000000000003 p0 0.008599999999999918\n",
      "trajectorySteps 15\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-5.000e-03 -1.720e-02 -1.680e-02 -4.400e-03 -7.800e-03]\n",
      " [-4.400e-03 -5.200e-03 -7.500e-03 -6.100e-03 -2.400e-03]\n",
      " [-1.000e-03 -3.800e-03  2.310e-02 -2.800e-03 -7.000e-04]\n",
      " [-1.000e-03 -7.000e-04  3.268e-01  5.000e-02 -2.600e-03]\n",
      " [-1.000e-04  1.600e-03  3.586e-01  5.970e-02  7.600e-03]]\n",
      "mean_state_value 0.029526408316473653\n",
      "episode 457/600\n",
      "p1 0.9664000000000004 p0 0.008399999999999918\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.900e-03 -1.680e-02 -1.640e-02 -4.300e-03 -7.600e-03]\n",
      " [-4.300e-03 -5.000e-03 -7.300e-03 -5.900e-03 -2.300e-03]\n",
      " [-1.000e-03 -3.700e-03  2.320e-02 -2.700e-03 -7.000e-04]\n",
      " [-9.000e-04 -7.000e-04  3.282e-01  5.010e-02 -2.500e-03]\n",
      " [-1.000e-04  1.600e-03  3.599e-01  6.010e-02  7.700e-03]]\n",
      "mean_state_value 0.029742316199681627\n",
      "episode 458/600\n",
      "p1 0.9672000000000003 p0 0.00819999999999992\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.800e-03 -1.640e-02 -1.600e-02 -4.200e-03 -7.400e-03]\n",
      " [-4.200e-03 -4.900e-03 -7.100e-03 -5.800e-03 -2.300e-03]\n",
      " [-1.000e-03 -3.700e-03  2.330e-02 -2.700e-03 -7.000e-04]\n",
      " [-9.000e-04 -7.000e-04  3.296e-01  5.010e-02 -2.400e-03]\n",
      " [-1.000e-04  1.600e-03  3.612e-01  6.050e-02  7.800e-03]]\n",
      "mean_state_value 0.02995873755338476\n",
      "episode 459/600\n",
      "p1 0.9680000000000003 p0 0.007999999999999919\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.600e-03 -1.600e-02 -1.560e-02 -4.100e-03 -7.200e-03]\n",
      " [-4.100e-03 -4.800e-03 -6.900e-03 -5.600e-03 -2.200e-03]\n",
      " [-1.000e-03 -3.600e-03  2.330e-02 -2.600e-03 -7.000e-04]\n",
      " [-9.000e-04 -6.000e-04  3.309e-01  5.020e-02 -2.300e-03]\n",
      " [-1.000e-04  1.600e-03  3.625e-01  6.090e-02  7.800e-03]]\n",
      "mean_state_value 0.03017534576385012\n",
      "episode 460/600\n",
      "p1 0.9688000000000003 p0 0.007799999999999918\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.500e-03 -1.560e-02 -1.520e-02 -4.000e-03 -7.000e-03]\n",
      " [-4.000e-03 -4.700e-03 -6.800e-03 -5.500e-03 -2.100e-03]\n",
      " [-1.000e-03 -3.500e-03  2.340e-02 -2.600e-03 -7.000e-04]\n",
      " [-9.000e-04 -6.000e-04  3.323e-01  5.020e-02 -2.200e-03]\n",
      " [-1.000e-04  1.600e-03  3.638e-01  6.140e-02  7.900e-03]]\n",
      "mean_state_value 0.030392140922881512\n",
      "episode 461/600\n",
      "p1 0.9696000000000004 p0 0.007599999999999918\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.400e-03 -1.520e-02 -1.480e-02 -3.900e-03 -6.900e-03]\n",
      " [-3.900e-03 -4.600e-03 -6.600e-03 -5.400e-03 -2.100e-03]\n",
      " [-9.000e-04 -3.400e-03  2.350e-02 -2.500e-03 -7.000e-04]\n",
      " [-8.000e-04 -6.000e-04  3.337e-01  5.030e-02 -2.100e-03]\n",
      " [-1.000e-04  1.600e-03  3.651e-01  6.180e-02  8.000e-03]]\n",
      "mean_state_value 0.03060912312228024\n",
      "episode 462/600\n",
      "p1 0.9704000000000004 p0 0.007399999999999918\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.300e-03 -1.480e-02 -1.450e-02 -3.800e-03 -6.700e-03]\n",
      " [-3.800e-03 -4.400e-03 -6.400e-03 -5.200e-03 -2.000e-03]\n",
      " [-9.000e-04 -3.300e-03  2.360e-02 -2.400e-03 -6.000e-04]\n",
      " [-8.000e-04 -6.000e-04  3.351e-01  5.030e-02 -2.100e-03]\n",
      " [-1.000e-04  1.600e-03  3.664e-01  6.220e-02  8.000e-03]]\n",
      "mean_state_value 0.03082629245384512\n",
      "episode 463/600\n",
      "p1 0.9712000000000003 p0 0.007199999999999917\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.200e-03 -1.440e-02 -1.410e-02 -3.700e-03 -6.500e-03]\n",
      " [-3.700e-03 -4.300e-03 -6.200e-03 -5.100e-03 -2.000e-03]\n",
      " [-9.000e-04 -3.200e-03  2.370e-02 -2.400e-03 -6.000e-04]\n",
      " [-8.000e-04 -6.000e-04  3.365e-01  5.040e-02 -2.000e-03]\n",
      " [-1.000e-04  1.600e-03  3.677e-01  6.260e-02  8.100e-03]]\n",
      "mean_state_value 0.031043649009372495\n",
      "episode 464/600\n",
      "p1 0.9720000000000003 p0 0.006999999999999918\n",
      "trajectorySteps 16\n",
      "[[1 1 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-4.100e-03 -1.400e-02 -1.370e-02 -3.600e-03 -6.300e-03]\n",
      " [-3.600e-03 -4.200e-03 -6.100e-03 -4.900e-03 -1.900e-03]\n",
      " [-9.000e-04 -3.100e-03  2.380e-02 -2.300e-03 -6.000e-04]\n",
      " [-8.000e-04 -6.000e-04  3.378e-01  5.040e-02 -1.900e-03]\n",
      " [-1.000e-04  1.600e-03  3.690e-01  6.310e-02  8.200e-03]]\n",
      "mean_state_value 0.03126119288065647\n",
      "episode 465/600\n",
      "p1 0.9728000000000003 p0 0.006799999999999917\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.900e-03 -1.360e-02 -1.330e-02 -3.500e-03 -6.100e-03]\n",
      " [-3.400e-03 -4.100e-03 -5.900e-03 -4.800e-03 -1.900e-03]\n",
      " [-8.000e-04 -3.000e-03  2.380e-02 -2.200e-03 -6.000e-04]\n",
      " [-8.000e-04 -5.000e-04  3.392e-01  5.050e-02 -1.800e-03]\n",
      " [-1.000e-04  1.700e-03  3.704e-01  6.350e-02  8.300e-03]]\n",
      "mean_state_value 0.03147892415948787\n",
      "episode 466/600\n",
      "p1 0.9736000000000004 p0 0.006599999999999917\n",
      "trajectorySteps 17\n",
      "[[1 2 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.800e-03 -1.320e-02 -1.290e-02 -3.400e-03 -6.000e-03]\n",
      " [-3.300e-03 -4.000e-03 -5.700e-03 -4.600e-03 -1.800e-03]\n",
      " [-8.000e-04 -2.900e-03  2.390e-02 -2.200e-03 -6.000e-04]\n",
      " [-7.000e-04 -5.000e-04  3.406e-01  5.050e-02 -1.700e-03]\n",
      " [-1.000e-04  1.700e-03  3.717e-01  6.390e-02  8.300e-03]]\n",
      "mean_state_value 0.031694604683065626\n",
      "episode 467/600\n",
      "p1 0.9744000000000004 p0 0.006399999999999917\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.70e-03 -1.28e-02 -1.25e-02 -3.30e-03 -5.80e-03]\n",
      " [-3.20e-03 -3.80e-03 -5.60e-03 -4.50e-03 -1.80e-03]\n",
      " [-8.00e-04 -2.90e-03  2.40e-02 -2.10e-03 -5.00e-04]\n",
      " [-7.00e-04 -5.00e-04  3.42e-01  5.06e-02 -1.60e-03]\n",
      " [-1.00e-04  1.70e-03  3.73e-01  6.43e-02  8.40e-03]]\n",
      "mean_state_value 0.031912778879715394\n",
      "episode 468/600\n",
      "p1 0.9752000000000003 p0 0.0061999999999999165\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.600e-03 -1.240e-02 -1.210e-02 -3.200e-03 -5.600e-03]\n",
      " [-3.100e-03 -3.700e-03 -5.400e-03 -4.400e-03 -1.700e-03]\n",
      " [-8.000e-04 -2.800e-03  2.410e-02 -2.000e-03 -5.000e-04]\n",
      " [-7.000e-04 -5.000e-04  3.434e-01  5.070e-02 -1.500e-03]\n",
      " [-1.000e-04  1.700e-03  3.743e-01  6.480e-02  8.500e-03]]\n",
      "mean_state_value 0.03213114075776419\n",
      "episode 469/600\n",
      "p1 0.9760000000000003 p0 0.005999999999999917\n",
      "trajectorySteps 18\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.500e-03 -1.200e-02 -1.170e-02 -3.100e-03 -5.400e-03]\n",
      " [-3.000e-03 -3.600e-03 -5.200e-03 -4.200e-03 -1.700e-03]\n",
      " [-7.000e-04 -2.700e-03  2.420e-02 -2.000e-03 -5.000e-04]\n",
      " [-7.000e-04 -5.000e-04  3.448e-01  5.070e-02 -1.400e-03]\n",
      " [-1.000e-04  1.700e-03  3.756e-01  6.520e-02  8.600e-03]]\n",
      "mean_state_value 0.032349330171267454\n",
      "episode 470/600\n",
      "p1 0.9768000000000003 p0 0.005799999999999916\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.400e-03 -1.160e-02 -1.130e-02 -3.000e-03 -5.200e-03]\n",
      " [-2.900e-03 -3.500e-03 -5.000e-03 -4.100e-03 -1.600e-03]\n",
      " [-7.000e-04 -2.600e-03  2.430e-02 -1.900e-03 -5.000e-04]\n",
      " [-6.000e-04 -5.000e-04  3.462e-01  5.080e-02 -1.400e-03]\n",
      " [-1.000e-04  1.700e-03  3.769e-01  6.560e-02  8.600e-03]]\n",
      "mean_state_value 0.032568079698446134\n",
      "episode 471/600\n",
      "p1 0.9776000000000004 p0 0.005599999999999916\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.300e-03 -1.120e-02 -1.090e-02 -2.900e-03 -5.100e-03]\n",
      " [-2.800e-03 -3.400e-03 -4.900e-03 -3.900e-03 -1.500e-03]\n",
      " [-7.000e-04 -2.500e-03  2.430e-02 -1.800e-03 -5.000e-04]\n",
      " [-6.000e-04 -4.000e-04  3.476e-01  5.080e-02 -1.300e-03]\n",
      " [-1.000e-04  1.700e-03  3.782e-01  6.610e-02  8.700e-03]]\n",
      "mean_state_value 0.03278701718386843\n",
      "episode 472/600\n",
      "p1 0.9784000000000004 p0 0.005399999999999916\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.100e-03 -1.080e-02 -1.050e-02 -2.800e-03 -4.900e-03]\n",
      " [-2.700e-03 -3.200e-03 -4.700e-03 -3.800e-03 -1.500e-03]\n",
      " [-7.000e-04 -2.400e-03  2.440e-02 -1.800e-03 -4.000e-04]\n",
      " [-6.000e-04 -4.000e-04  3.490e-01  5.090e-02 -1.200e-03]\n",
      " [-1.000e-04  1.700e-03  3.795e-01  6.650e-02  8.800e-03]]\n",
      "mean_state_value 0.03300595306303148\n",
      "episode 473/600\n",
      "p1 0.9792000000000003 p0 0.005199999999999916\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-3.000e-03 -1.040e-02 -1.020e-02 -2.700e-03 -4.700e-03]\n",
      " [-2.600e-03 -3.100e-03 -4.500e-03 -3.700e-03 -1.400e-03]\n",
      " [-6.000e-04 -2.300e-03  2.450e-02 -1.700e-03 -4.000e-04]\n",
      " [-6.000e-04 -4.000e-04  3.494e-01  5.090e-02 -1.100e-03]\n",
      " [-1.000e-04  1.700e-03  3.809e-01  6.690e-02  8.900e-03]]\n",
      "mean_state_value 0.03318628749605311\n",
      "episode 474/600\n",
      "p1 0.9800000000000003 p0 0.004999999999999916\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.900e-03 -1.000e-02 -9.800e-03 -2.600e-03 -4.500e-03]\n",
      " [-2.500e-03 -3.000e-03 -4.300e-03 -3.500e-03 -1.400e-03]\n",
      " [-6.000e-04 -2.200e-03  2.460e-02 -1.600e-03 -4.000e-04]\n",
      " [-6.000e-04 -4.000e-04  3.508e-01  5.100e-02 -1.000e-03]\n",
      " [-1.000e-04  1.700e-03  3.822e-01  6.740e-02  8.900e-03]]\n",
      "mean_state_value 0.03340572338085924\n",
      "episode 475/600\n",
      "p1 0.9808000000000003 p0 0.0047999999999999154\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.800e-03 -9.600e-03 -9.400e-03 -2.500e-03 -4.300e-03]\n",
      " [-2.400e-03 -2.900e-03 -4.200e-03 -3.400e-03 -1.300e-03]\n",
      " [-6.000e-04 -2.100e-03  2.470e-02 -1.600e-03 -4.000e-04]\n",
      " [-5.000e-04 -4.000e-04  3.522e-01  5.100e-02 -9.000e-04]\n",
      " [-1.000e-04  1.700e-03  3.835e-01  6.780e-02  9.000e-03]]\n",
      "mean_state_value 0.03362534729965901\n",
      "episode 476/600\n",
      "p1 0.9816000000000004 p0 0.004599999999999915\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.700e-03 -9.200e-03 -9.000e-03 -2.400e-03 -4.200e-03]\n",
      " [-2.300e-03 -2.800e-03 -4.000e-03 -3.200e-03 -1.300e-03]\n",
      " [-6.000e-04 -2.100e-03  2.480e-02 -1.500e-03 -4.000e-04]\n",
      " [-5.000e-04 -4.000e-04  3.536e-01  5.110e-02 -8.000e-04]\n",
      " [-1.000e-04  1.700e-03  3.848e-01  6.830e-02  9.100e-03]]\n",
      "mean_state_value 0.033845159573765056\n",
      "episode 477/600\n",
      "p1 0.9824000000000004 p0 0.004399999999999915\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.600e-03 -8.800e-03 -8.600e-03 -2.300e-03 -4.000e-03]\n",
      " [-2.200e-03 -2.600e-03 -3.800e-03 -3.100e-03 -1.200e-03]\n",
      " [-5.000e-04 -2.000e-03  2.480e-02 -1.400e-03 -3.000e-04]\n",
      " [-5.000e-04 -4.000e-04  3.550e-01  5.110e-02 -7.000e-04]\n",
      " [-1.000e-04  1.700e-03  3.862e-01  6.870e-02  9.200e-03]]\n",
      "mean_state_value 0.03406516029494075\n",
      "episode 478/600\n",
      "p1 0.9832000000000003 p0 0.004199999999999915\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.400e-03 -8.400e-03 -8.200e-03 -2.200e-03 -3.800e-03]\n",
      " [-2.100e-03 -2.500e-03 -3.600e-03 -3.000e-03 -1.200e-03]\n",
      " [-5.000e-04 -1.900e-03  2.490e-02 -1.400e-03 -3.000e-04]\n",
      " [-5.000e-04 -3.000e-04  3.564e-01  5.120e-02 -6.000e-04]\n",
      " [-1.000e-04  1.800e-03  3.875e-01  6.910e-02  9.200e-03]]\n",
      "mean_state_value 0.034285349554946984\n",
      "episode 479/600\n",
      "p1 0.9840000000000003 p0 0.003999999999999915\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.300e-03 -8.000e-03 -7.800e-03 -2.000e-03 -3.600e-03]\n",
      " [-2.000e-03 -2.400e-03 -3.500e-03 -2.800e-03 -1.100e-03]\n",
      " [-5.000e-04 -1.800e-03  2.500e-02 -1.300e-03 -3.000e-04]\n",
      " [-4.000e-04 -3.000e-04  3.578e-01  5.120e-02 -6.000e-04]\n",
      " [-1.000e-04  1.800e-03  3.888e-01  6.960e-02  9.300e-03]]\n",
      "mean_state_value 0.03450572744554217\n",
      "episode 480/600\n",
      "p1 0.9848000000000003 p0 0.0037999999999999146\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.200e-03 -7.600e-03 -7.400e-03 -1.900e-03 -3.400e-03]\n",
      " [-1.900e-03 -2.300e-03 -3.300e-03 -2.700e-03 -1.000e-03]\n",
      " [-5.000e-04 -1.700e-03  2.510e-02 -1.200e-03 -3.000e-04]\n",
      " [-4.000e-04 -3.000e-04  3.592e-01  5.130e-02 -5.000e-04]\n",
      " [-0.000e+00  1.800e-03  3.901e-01  7.000e-02  9.400e-03]]\n",
      "mean_state_value 0.03472616073011965\n",
      "episode 481/600\n",
      "p1 0.9856000000000004 p0 0.0035999999999999145\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.100e-03 -7.200e-03 -7.000e-03 -1.800e-03 -3.200e-03]\n",
      " [-1.800e-03 -2.200e-03 -3.100e-03 -2.500e-03 -1.000e-03]\n",
      " [-4.000e-04 -1.600e-03  2.520e-02 -1.200e-03 -3.000e-04]\n",
      " [-4.000e-04 -3.000e-04  3.606e-01  5.140e-02 -4.000e-04]\n",
      " [-0.000e+00  1.800e-03  3.915e-01  7.050e-02  9.500e-03]]\n",
      "mean_state_value 0.03494692317444027\n",
      "episode 482/600\n",
      "p1 0.9864000000000004 p0 0.0033999999999999144\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.000e-03 -6.800e-03 -6.600e-03 -1.700e-03 -3.100e-03]\n",
      " [-1.700e-03 -2.000e-03 -2.900e-03 -2.400e-03 -9.000e-04]\n",
      " [-4.000e-04 -1.500e-03  2.520e-02 -1.100e-03 -2.000e-04]\n",
      " [-4.000e-04 -3.000e-04  3.620e-01  5.140e-02 -3.000e-04]\n",
      " [-0.000e+00  1.800e-03  3.928e-01  7.090e-02  9.600e-03]]\n",
      "mean_state_value 0.035167755350105835\n",
      "episode 483/600\n",
      "p1 0.9872000000000003 p0 0.003199999999999914\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.900e-03 -6.400e-03 -6.300e-03 -1.600e-03 -2.900e-03]\n",
      " [-1.600e-03 -1.900e-03 -2.800e-03 -2.300e-03 -9.000e-04]\n",
      " [-4.000e-04 -1.400e-03  2.530e-02 -1.000e-03 -2.000e-04]\n",
      " [-4.000e-04 -3.000e-04  3.634e-01  5.150e-02 -2.000e-04]\n",
      " [-0.000e+00  1.800e-03  3.941e-01  7.140e-02  9.600e-03]]\n",
      "mean_state_value 0.03538890270813839\n",
      "episode 484/600\n",
      "p1 0.9880000000000003 p0 0.0029999999999999138\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.700e-03 -6.000e-03 -5.900e-03 -1.500e-03 -2.700e-03]\n",
      " [-1.500e-03 -1.800e-03 -2.600e-03 -2.100e-03 -8.000e-04]\n",
      " [-4.000e-04 -1.300e-03  2.540e-02 -1.000e-03 -2.000e-04]\n",
      " [-3.000e-04 -2.000e-04  3.649e-01  5.150e-02 -1.000e-04]\n",
      " [-0.000e+00  1.800e-03  3.955e-01  7.180e-02  9.700e-03]]\n",
      "mean_state_value 0.03561023915551464\n",
      "episode 485/600\n",
      "p1 0.9888000000000003 p0 0.0027999999999999137\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.600e-03 -5.600e-03 -5.500e-03 -1.400e-03 -2.500e-03]\n",
      " [-1.400e-03 -1.700e-03 -2.400e-03 -2.000e-03 -8.000e-04]\n",
      " [-4.000e-04 -1.200e-03  2.550e-02 -9.000e-04 -2.000e-04]\n",
      " [-3.000e-04 -2.000e-04  3.663e-01  5.160e-02 -0.000e+00]\n",
      " [-0.000e+00  1.800e-03  3.968e-01  7.230e-02  9.800e-03]]\n",
      "mean_state_value 0.03583166673841234\n",
      "episode 486/600\n",
      "p1 0.9896000000000004 p0 0.0025999999999999136\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-03 -5.200e-03 -5.100e-03 -1.300e-03 -2.300e-03]\n",
      " [-1.300e-03 -1.600e-03 -2.300e-03 -1.800e-03 -7.000e-04]\n",
      " [-3.000e-04 -1.200e-03  2.560e-02 -9.000e-04 -2.000e-04]\n",
      " [-3.000e-04 -2.000e-04  3.677e-01  5.160e-02  1.000e-04]\n",
      " [-0.000e+00  1.800e-03  3.981e-01  7.270e-02  9.900e-03]]\n",
      "mean_state_value 0.036053388642958636\n",
      "episode 487/600\n",
      "p1 0.9904000000000004 p0 0.0023999999999999135\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.400e-03 -4.800e-03 -4.700e-03 -1.200e-03 -2.200e-03]\n",
      " [-1.200e-03 -1.400e-03 -2.100e-03 -1.700e-03 -7.000e-04]\n",
      " [-3.000e-04 -1.100e-03  2.570e-02 -8.000e-04 -1.000e-04]\n",
      " [-3.000e-04 -2.000e-04  3.691e-01  5.170e-02  1.000e-04]\n",
      " [-0.000e+00  1.800e-03  3.995e-01  7.320e-02  1.000e-02]]\n",
      "mean_state_value 0.03627520805956682\n",
      "episode 488/600\n",
      "p1 0.9912000000000003 p0 0.002199999999999913\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.300e-03 -4.400e-03 -4.300e-03 -1.100e-03 -2.000e-03]\n",
      " [-1.100e-03 -1.300e-03 -1.900e-03 -1.500e-03 -6.000e-04]\n",
      " [-3.000e-04 -1.000e-03  2.570e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.705e-01  5.170e-02  2.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.008e-01  7.360e-02  1.000e-02]]\n",
      "mean_state_value 0.03649731643935792\n",
      "episode 489/600\n",
      "p1 0.9920000000000003 p0 0.001999999999999913\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.719e-01  5.180e-02  3.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.022e-01  7.410e-02  1.010e-02]]\n",
      "mean_state_value 0.03671961436718528\n",
      "episode 490/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.729e-01  5.180e-02  3.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.031e-01  7.440e-02  1.020e-02]]\n",
      "mean_state_value 0.03681343441173983\n",
      "episode 491/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.739e-01  5.180e-02  3.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.041e-01  7.470e-02  1.030e-02]]\n",
      "mean_state_value 0.03690729276058926\n",
      "episode 492/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.749e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.051e-01  7.500e-02  1.030e-02]]\n",
      "mean_state_value 0.037001259235548443\n",
      "episode 493/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.759e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.060e-01  7.540e-02  1.040e-02]]\n",
      "mean_state_value 0.037095263943291025\n",
      "episode 494/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.769e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.070e-01  7.570e-02  1.040e-02]]\n",
      "mean_state_value 0.03718930688304258\n",
      "episode 495/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.779e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.079e-01  7.600e-02  1.050e-02]]\n",
      "mean_state_value 0.03728331823136935\n",
      "episode 496/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.789e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.089e-01  7.640e-02  1.060e-02]]\n",
      "mean_state_value 0.03737743763281556\n",
      "episode 497/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.799e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.099e-01  7.670e-02  1.060e-02]]\n",
      "mean_state_value 0.037471595263947506\n",
      "episode 498/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.808e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.108e-01  7.700e-02  1.070e-02]]\n",
      "mean_state_value 0.03756572137115408\n",
      "episode 499/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.818e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.118e-01  7.730e-02  1.080e-02]]\n",
      "mean_state_value 0.03765996167775239\n",
      "episode 500/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.828e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.127e-01  7.770e-02  1.080e-02]]\n",
      "mean_state_value 0.03775423399329516\n",
      "episode 501/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.838e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.137e-01  7.800e-02  1.090e-02]]\n",
      "mean_state_value 0.03784854453542614\n",
      "episode 502/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.848e-01  5.180e-02  4.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.147e-01  7.830e-02  1.100e-02]]\n",
      "mean_state_value 0.03794289330337098\n",
      "episode 503/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.858e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.156e-01  7.870e-02  1.100e-02]]\n",
      "mean_state_value 0.03803728029635536\n",
      "episode 504/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.868e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.166e-01  7.900e-02  1.110e-02]]\n",
      "mean_state_value 0.03813163583052109\n",
      "episode 505/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.878e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.175e-01  7.930e-02  1.120e-02]]\n",
      "mean_state_value 0.038226099271261554\n",
      "episode 506/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.887e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.185e-01  7.970e-02  1.130e-02]]\n",
      "mean_state_value 0.03832060093471862\n",
      "episode 507/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.897e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.195e-01  8.000e-02  1.130e-02]]\n",
      "mean_state_value 0.03841514082011798\n",
      "episode 508/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.907e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.204e-01  8.030e-02  1.140e-02]]\n",
      "mean_state_value 0.03850964931328463\n",
      "episode 509/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.917e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.214e-01  8.070e-02  1.150e-02]]\n",
      "mean_state_value 0.03860426564024578\n",
      "episode 510/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.927e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.223e-01  8.100e-02  1.150e-02]]\n",
      "mean_state_value 0.038698920186826434\n",
      "episode 511/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.937e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.233e-01  8.140e-02  1.160e-02]]\n",
      "mean_state_value 0.03879361295225235\n",
      "episode 512/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.947e-01  5.180e-02  5.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.243e-01  8.170e-02  1.170e-02]]\n",
      "mean_state_value 0.03888834393574931\n",
      "episode 513/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.957e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.252e-01  8.200e-02  1.170e-02]]\n",
      "mean_state_value 0.03898311313654308\n",
      "episode 514/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.967e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.262e-01  8.240e-02  1.180e-02]]\n",
      "mean_state_value 0.03907792055385946\n",
      "episode 515/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.976e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.271e-01  8.270e-02  1.190e-02]]\n",
      "mean_state_value 0.039172766186924245\n",
      "episode 516/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.986e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.281e-01  8.310e-02  1.190e-02]]\n",
      "mean_state_value 0.03926765003496327\n",
      "episode 517/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  3.996e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.291e-01  8.340e-02  1.200e-02]]\n",
      "mean_state_value 0.039362502553414995\n",
      "episode 518/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.006e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.300e-01  8.370e-02  1.210e-02]]\n",
      "mean_state_value 0.03945746282907997\n",
      "episode 519/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.016e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.310e-01  8.410e-02  1.220e-02]]\n",
      "mean_state_value 0.03955246131739669\n",
      "episode 520/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.026e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.319e-01  8.440e-02  1.220e-02]]\n",
      "mean_state_value 0.03964742854334747\n",
      "episode 521/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.036e-01  5.180e-02  6.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.329e-01  8.480e-02  1.230e-02]]\n",
      "mean_state_value 0.03974250345464528\n",
      "episode 522/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.046e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.339e-01  8.510e-02  1.240e-02]]\n",
      "mean_state_value 0.039837547171503146\n",
      "episode 523/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.055e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.348e-01  8.550e-02  1.250e-02]]\n",
      "mean_state_value 0.039932698502685604\n",
      "episode 524/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.065e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.358e-01  8.580e-02  1.250e-02]]\n",
      "mean_state_value 0.040027888042649236\n",
      "episode 525/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.075e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.367e-01  8.620e-02  1.260e-02]]\n",
      "mean_state_value 0.040123046455255426\n",
      "episode 526/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.085e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.377e-01  8.650e-02  1.270e-02]]\n",
      "mean_state_value 0.04021831241045919\n",
      "episode 527/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.095e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.387e-01  8.680e-02  1.270e-02]]\n",
      "mean_state_value 0.04031361657212194\n",
      "episode 528/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.105e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.396e-01  8.720e-02  1.280e-02]]\n",
      "mean_state_value 0.04040895893946961\n",
      "episode 529/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.115e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.406e-01  8.750e-02  1.290e-02]]\n",
      "mean_state_value 0.04050433951172819\n",
      "episode 530/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.125e-01  5.180e-02  7.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.415e-01  8.790e-02  1.300e-02]]\n",
      "mean_state_value 0.040599758288123636\n",
      "episode 531/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.134e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.425e-01  8.820e-02  1.300e-02]]\n",
      "mean_state_value 0.04069514600185279\n",
      "episode 532/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.144e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.435e-01  8.860e-02  1.310e-02]]\n",
      "mean_state_value 0.0407906411842\n",
      "episode 533/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.154e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.444e-01  8.890e-02  1.320e-02]]\n",
      "mean_state_value 0.04088617456836207\n",
      "episode 534/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.164e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.454e-01  8.930e-02  1.330e-02]]\n",
      "mean_state_value 0.04098174615356509\n",
      "episode 535/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.174e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.463e-01  8.960e-02  1.330e-02]]\n",
      "mean_state_value 0.04107735593903505\n",
      "episode 536/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.184e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.473e-01  9.000e-02  1.340e-02]]\n",
      "mean_state_value 0.04117300392399801\n",
      "episode 537/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.194e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.483e-01  9.040e-02  1.350e-02]]\n",
      "mean_state_value 0.041268690107680035\n",
      "episode 538/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.204e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.492e-01  9.070e-02  1.360e-02]]\n",
      "mean_state_value 0.041364414489307194\n",
      "episode 539/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.213e-01  5.180e-02  8.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.502e-01  9.110e-02  1.370e-02]]\n",
      "mean_state_value 0.041460177068105585\n",
      "episode 540/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.223e-01  5.180e-02  9.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.511e-01  9.140e-02  1.370e-02]]\n",
      "mean_state_value 0.041555977843301284\n",
      "episode 541/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.223e-01  5.180e-02  9.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.521e-01  9.180e-02  1.380e-02]]\n",
      "mean_state_value 0.041611487084275006\n",
      "episode 542/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.233e-01  5.180e-02  9.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.531e-01  9.210e-02  1.390e-02]]\n",
      "mean_state_value 0.04170732549164346\n",
      "episode 543/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.243e-01  5.180e-02  9.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.540e-01  9.250e-02  1.400e-02]]\n",
      "mean_state_value 0.041803202093867106\n",
      "episode 544/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.253e-01  5.180e-02  9.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.550e-01  9.280e-02  1.400e-02]]\n",
      "mean_state_value 0.041899116890172054\n",
      "episode 545/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 0 0]\n",
      " [1 0 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.263e-01  5.180e-02  9.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.559e-01  9.320e-02  1.410e-02]]\n",
      "mean_state_value 0.04199431765777525\n",
      "episode 546/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.273e-01  5.180e-02  9.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.569e-01  9.360e-02  1.420e-02]]\n",
      "mean_state_value 0.04209031634745073\n",
      "episode 547/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.282e-01  5.180e-02  9.000e-04]\n",
      " [-0.000e+00  1.800e-03  4.579e-01  9.390e-02  1.430e-02]]\n",
      "mean_state_value 0.042186345721362234\n",
      "episode 548/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.292e-01  5.180e-02  1.000e-03]\n",
      " [-0.000e+00  1.800e-03  4.588e-01  9.430e-02  1.440e-02]]\n",
      "mean_state_value 0.042282344158693294\n",
      "episode 549/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.302e-01  5.180e-02  1.000e-03]\n",
      " [-0.000e+00  1.800e-03  4.598e-01  9.460e-02  1.440e-02]]\n",
      "mean_state_value 0.04237844991380291\n",
      "episode 550/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.312e-01  5.180e-02  1.000e-03]\n",
      " [-0.000e+00  1.800e-03  4.607e-01  9.500e-02  1.450e-02]]\n",
      "mean_state_value 0.04247459385835085\n",
      "episode 551/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.322e-01  5.180e-02  1.000e-03]\n",
      " [-0.000e+00  1.800e-03  4.617e-01  9.540e-02  1.460e-02]]\n",
      "mean_state_value 0.04257077599156335\n",
      "episode 552/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.332e-01  5.180e-02  1.000e-03]\n",
      " [-0.000e+00  1.800e-03  4.627e-01  9.570e-02  1.470e-02]]\n",
      "mean_state_value 0.04266699631266664\n",
      "episode 553/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.342e-01  5.180e-02  1.000e-03]\n",
      " [-0.000e+00  1.800e-03  4.636e-01  9.610e-02  1.480e-02]]\n",
      "mean_state_value 0.04276325482088694\n",
      "episode 554/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.352e-01  5.180e-02  1.000e-03]\n",
      " [-0.000e+00  1.800e-03  4.646e-01  9.650e-02  1.480e-02]]\n",
      "mean_state_value 0.0428594824570117\n",
      "episode 555/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.361e-01  5.180e-02  1.000e-03]\n",
      " [-0.000e+00  1.800e-03  4.655e-01  9.680e-02  1.490e-02]]\n",
      "mean_state_value 0.042955748347764416\n",
      "episode 556/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.371e-01  5.180e-02  1.100e-03]\n",
      " [-0.000e+00  1.800e-03  4.665e-01  9.720e-02  1.500e-02]]\n",
      "mean_state_value 0.043052121412693316\n",
      "episode 557/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04 -0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.381e-01  5.180e-02  1.100e-03]\n",
      " [-0.000e+00  1.800e-03  4.675e-01  9.760e-02  1.510e-02]]\n",
      "mean_state_value 0.04314846374125328\n",
      "episode 558/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.391e-01  5.180e-02  1.100e-03]\n",
      " [-0.000e+00  1.800e-03  4.684e-01  9.790e-02  1.520e-02]]\n",
      "mean_state_value 0.04324491317345263\n",
      "episode 559/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.401e-01  5.180e-02  1.100e-03]\n",
      " [-0.000e+00  1.800e-03  4.694e-01  9.830e-02  1.530e-02]]\n",
      "mean_state_value 0.04334140078812666\n",
      "episode 560/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.411e-01  5.180e-02  1.100e-03]\n",
      " [-0.000e+00  1.800e-03  4.703e-01  9.870e-02  1.530e-02]]\n",
      "mean_state_value 0.0434379265845017\n",
      "episode 561/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.421e-01  5.180e-02  1.100e-03]\n",
      " [-0.000e+00  1.800e-03  4.713e-01  9.900e-02  1.540e-02]]\n",
      "mean_state_value 0.04353449056180406\n",
      "episode 562/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.431e-01  5.180e-02  1.100e-03]\n",
      " [-0.000e+00  1.800e-03  4.723e-01  9.940e-02  1.550e-02]]\n",
      "mean_state_value 0.04363102386778947\n",
      "episode 563/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 0 0]\n",
      " [1 0 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.440e-01  5.180e-02  1.200e-03]\n",
      " [-0.000e+00  1.800e-03  4.732e-01  9.980e-02  1.560e-02]]\n",
      "mean_state_value 0.0437269818625519\n",
      "episode 564/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.450e-01  5.180e-02  1.200e-03]\n",
      " [-0.000e+00  1.800e-03  4.742e-01  1.001e-01  1.570e-02]]\n",
      "mean_state_value 0.04382366037800122\n",
      "episode 565/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.460e-01  5.180e-02  1.200e-03]\n",
      " [-0.000e+00  1.800e-03  4.751e-01  1.005e-01  1.580e-02]]\n",
      "mean_state_value 0.04392037707128332\n",
      "episode 566/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.470e-01  5.180e-02  1.200e-03]\n",
      " [-0.000e+00  1.800e-03  4.761e-01  1.009e-01  1.580e-02]]\n",
      "mean_state_value 0.044017063159005415\n",
      "episode 567/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.480e-01  5.180e-02  1.200e-03]\n",
      " [-0.000e+00  1.800e-03  4.771e-01  1.012e-01  1.590e-02]]\n",
      "mean_state_value 0.0441138562056322\n",
      "episode 568/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.490e-01  5.180e-02  1.200e-03]\n",
      " [-0.000e+00  1.800e-03  4.780e-01  1.016e-01  1.600e-02]]\n",
      "mean_state_value 0.04421068742777093\n",
      "episode 569/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.20e-03 -4.00e-03 -3.90e-03 -1.00e-03 -1.80e-03]\n",
      " [-1.00e-03 -1.20e-03 -1.70e-03 -1.40e-03 -5.00e-04]\n",
      " [-3.00e-04 -9.00e-04  2.58e-02 -7.00e-04  0.00e+00]\n",
      " [-2.00e-04 -2.00e-04  4.50e-01  5.18e-02  1.20e-03]\n",
      " [-0.00e+00  1.80e-03  4.79e-01  1.02e-01  1.61e-02]]\n",
      "mean_state_value 0.04430755682464805\n",
      "episode 570/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.510e-01  5.180e-02  1.300e-03]\n",
      " [-0.000e+00  1.800e-03  4.799e-01  1.024e-01  1.620e-02]]\n",
      "mean_state_value 0.04440446439548999\n",
      "episode 571/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.519e-01  5.180e-02  1.300e-03]\n",
      " [-0.000e+00  1.800e-03  4.809e-01  1.027e-01  1.630e-02]]\n",
      "mean_state_value 0.0445014101395232\n",
      "episode 572/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.529e-01  5.180e-02  1.300e-03]\n",
      " [-0.000e+00  1.800e-03  4.819e-01  1.031e-01  1.640e-02]]\n",
      "mean_state_value 0.04459839536773705\n",
      "episode 573/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.539e-01  5.180e-02  1.300e-03]\n",
      " [-0.000e+00  1.800e-03  4.828e-01  1.035e-01  1.640e-02]]\n",
      "mean_state_value 0.044695348741995634\n",
      "episode 574/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.549e-01  5.180e-02  1.300e-03]\n",
      " [-0.000e+00  1.800e-03  4.838e-01  1.039e-01  1.650e-02]]\n",
      "mean_state_value 0.044792409000961396\n",
      "episode 575/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.559e-01  5.180e-02  1.300e-03]\n",
      " [-0.000e+00  1.800e-03  4.847e-01  1.042e-01  1.660e-02]]\n",
      "mean_state_value 0.04488950743002434\n",
      "episode 576/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.569e-01  5.180e-02  1.300e-03]\n",
      " [-0.000e+00  1.800e-03  4.857e-01  1.046e-01  1.670e-02]]\n",
      "mean_state_value 0.044986575383288256\n",
      "episode 577/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.579e-01  5.180e-02  1.400e-03]\n",
      " [-0.000e+00  1.800e-03  4.867e-01  1.050e-01  1.680e-02]]\n",
      "mean_state_value 0.04508375015022504\n",
      "episode 578/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.589e-01  5.180e-02  1.400e-03]\n",
      " [-0.000e+00  1.800e-03  4.876e-01  1.054e-01  1.690e-02]]\n",
      "mean_state_value 0.045180963084938526\n",
      "episode 579/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.598e-01  5.180e-02  1.400e-03]\n",
      " [-0.000e+00  1.800e-03  4.886e-01  1.057e-01  1.700e-02]]\n",
      "mean_state_value 0.045278214186655265\n",
      "episode 580/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.608e-01  5.180e-02  1.400e-03]\n",
      " [-0.000e+00  1.800e-03  4.895e-01  1.061e-01  1.710e-02]]\n",
      "mean_state_value 0.045375503473072606\n",
      "episode 581/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.618e-01  5.180e-02  1.400e-03]\n",
      " [-0.000e+00  1.800e-03  4.905e-01  1.065e-01  1.710e-02]]\n",
      "mean_state_value 0.0454728309064755\n",
      "episode 582/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[0 1 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.628e-01  5.180e-02  1.400e-03]\n",
      " [-0.000e+00  1.800e-03  4.915e-01  1.069e-01  1.720e-02]]\n",
      "mean_state_value 0.04556943325259189\n",
      "episode 583/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.638e-01  5.180e-02  1.400e-03]\n",
      " [-0.000e+00  1.800e-03  4.924e-01  1.073e-01  1.730e-02]]\n",
      "mean_state_value 0.04566676843810967\n",
      "episode 584/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  0.000e+00]\n",
      " [-2.000e-04 -2.000e-04  4.648e-01  5.180e-02  1.500e-03]\n",
      " [-0.000e+00  1.800e-03  4.934e-01  1.076e-01  1.740e-02]]\n",
      "mean_state_value 0.04576421036324115\n",
      "episode 585/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.658e-01  5.180e-02  1.500e-03]\n",
      " [-0.000e+00  1.800e-03  4.943e-01  1.080e-01  1.750e-02]]\n",
      "mean_state_value 0.04586169045073535\n",
      "episode 586/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.668e-01  5.180e-02  1.500e-03]\n",
      " [-0.000e+00  1.800e-03  4.953e-01  1.084e-01  1.760e-02]]\n",
      "mean_state_value 0.045959140191917804\n",
      "episode 587/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.677e-01  5.180e-02  1.500e-03]\n",
      " [-0.000e+00  1.800e-03  4.963e-01  1.088e-01  1.770e-02]]\n",
      "mean_state_value 0.04605669660181734\n",
      "episode 588/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.687e-01  5.180e-02  1.500e-03]\n",
      " [-0.000e+00  1.800e-03  4.972e-01  1.092e-01  1.780e-02]]\n",
      "mean_state_value 0.04615429117175949\n",
      "episode 589/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 2 2]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.697e-01  5.180e-02  1.500e-03]\n",
      " [-0.000e+00  1.800e-03  4.982e-01  1.096e-01  1.790e-02]]\n",
      "mean_state_value 0.04625192396596076\n",
      "episode 590/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.707e-01  5.180e-02  1.600e-03]\n",
      " [-0.000e+00  1.800e-03  4.991e-01  1.099e-01  1.800e-02]]\n",
      "mean_state_value 0.04634959485366749\n",
      "episode 591/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.717e-01  5.180e-02  1.600e-03]\n",
      " [-0.000e+00  1.800e-03  5.001e-01  1.103e-01  1.810e-02]]\n",
      "mean_state_value 0.04644723545970366\n",
      "episode 592/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 2]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.727e-01  5.180e-02  1.600e-03]\n",
      " [-0.000e+00  1.800e-03  5.011e-01  1.107e-01  1.810e-02]]\n",
      "mean_state_value 0.04654498268247349\n",
      "episode 593/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.737e-01  5.180e-02  1.600e-03]\n",
      " [-0.000e+00  1.800e-03  5.020e-01  1.111e-01  1.820e-02]]\n",
      "mean_state_value 0.04664276804102819\n",
      "episode 594/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[2 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.747e-01  5.180e-02  1.600e-03]\n",
      " [-0.000e+00  1.800e-03  5.030e-01  1.115e-01  1.830e-02]]\n",
      "mean_state_value 0.04674051515593924\n",
      "episode 595/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.756e-01  5.180e-02  1.600e-03]\n",
      " [-0.000e+00  1.800e-03  5.040e-01  1.119e-01  1.840e-02]]\n",
      "mean_state_value 0.04683837682452612\n",
      "episode 596/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.766e-01  5.180e-02  1.600e-03]\n",
      " [-0.000e+00  1.800e-03  5.049e-01  1.123e-01  1.850e-02]]\n",
      "mean_state_value 0.046936208276015405\n",
      "episode 597/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.776e-01  5.180e-02  1.700e-03]\n",
      " [-0.000e+00  1.800e-03  5.059e-01  1.126e-01  1.860e-02]]\n",
      "mean_state_value 0.04703414625154143\n",
      "episode 598/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.786e-01  5.180e-02  1.700e-03]\n",
      " [-0.000e+00  1.800e-03  5.068e-01  1.130e-01  1.870e-02]]\n",
      "mean_state_value 0.04713212237937716\n",
      "episode 599/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 1 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏫️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.200e-03 -4.000e-03 -3.900e-03 -1.000e-03 -1.800e-03]\n",
      " [-1.000e-03 -1.200e-03 -1.700e-03 -1.400e-03 -5.000e-04]\n",
      " [-3.000e-04 -9.000e-04  2.580e-02 -7.000e-04  1.000e-04]\n",
      " [-2.000e-04 -2.000e-04  4.796e-01  5.180e-02  1.700e-03]\n",
      " [-0.000e+00  1.800e-03  5.078e-01  1.134e-01  1.880e-02]]\n",
      "mean_state_value 0.047230136658749367\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<GridWorld_v2.GridWorld_v2 at 0x1e5762b4c80>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "id": "a1a7b90a-5a20-47df-bc26-fff11a956772",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:27:29.302745Z",
     "start_time": "2025-04-28T08:27:29.300741Z"
    }
   },
   "source": [],
   "outputs": [],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "id": "dfc4c5cb-279f-4ad1-9d60-5253ba9f5b71",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:27:29.304752Z",
     "start_time": "2025-04-28T08:27:29.303249Z"
    }
   },
   "source": [],
   "outputs": [],
   "execution_count": 4
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
